diff --git a/.github/workflows/test-framework-cli.yaml b/.github/workflows/test-framework-cli.yaml index b318935553..28fae0f83b 100644 --- a/.github/workflows/test-framework-cli.yaml +++ b/.github/workflows/test-framework-cli.yaml @@ -43,8 +43,8 @@ jobs: "^\.github/workflows/test-framework-cli\.yaml" "^apps/framework-cli-e2e/" "^apps/framework-cli/" - "^templates/python/" - "^templates/typescript/" + "^templates/python" + "^templates/typescript" "^packages/" "Cargo.lock" "pnpm-lock.yaml" @@ -716,6 +716,138 @@ jobs: run: | cat ~/.moose/*-cli.log + test-e2e-cluster-typescript: + needs: + [detect-changes, check, test-cli, test-ts-moose-lib, test-py-moose-lib] + if: needs.detect-changes.outputs.should_run == 'true' + name: Test E2E Cluster Support - TypeScript (Node 20) + runs-on: ubuntu-latest + permissions: + contents: read + env: + RUST_BACKTRACE: full + steps: + - name: Install Protoc (Needed for Temporal) + uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + version: "23.x" + + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + + # Login to Docker hub to get higher rate limits when moose pulls images + - name: Login to Docker Hub + uses: ./.github/actions/docker-login + with: + op-service-account-token: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + + - uses: pnpm/action-setup@v4 + + - name: Install node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: "pnpm" + + - name: Get system info + id: system + run: | + echo "version=$(lsb_release -rs)" >> $GITHUB_OUTPUT + echo "distro=$(lsb_release -is)" >> $GITHUB_OUTPUT + + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + cache: true + cache-shared-key: ${{ runner.os }}-${{ steps.system.outputs.distro }}-${{ steps.system.outputs.version }}-${{ runner.arch }}-rust + cache-on-failure: true + cache-all-crates: true + cache-workspace-crates: true + + - name: Run TypeScript Cluster E2E Tests + run: pnpm install --frozen-lockfile && pnpm --filter=framework-cli-e2e run test -- --grep "TypeScript Cluster Template" + env: + MOOSE_TELEMETRY_ENABLED: false + + - name: Inspect Logs + if: always() + run: | + cat ~/.moose/*-cli.log + + test-e2e-cluster-python: + needs: + [detect-changes, check, test-cli, test-ts-moose-lib, test-py-moose-lib] + if: needs.detect-changes.outputs.should_run == 'true' + name: Test E2E Cluster Support - Python (Python 3.13) + runs-on: ubuntu-latest + permissions: + contents: read + env: + RUST_BACKTRACE: full + steps: + - name: Install Protoc (Needed for Temporal) + uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + version: "23.x" + + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + + # Login to Docker hub to get higher rate limits when moose pulls images + - name: Login to Docker Hub + uses: ./.github/actions/docker-login + with: + op-service-account-token: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + + - uses: pnpm/action-setup@v4 + + - name: Install node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: "pnpm" + + - name: Get system info + id: system + run: | + echo "version=$(lsb_release -rs)" >> $GITHUB_OUTPUT + echo "distro=$(lsb_release -is)" >> $GITHUB_OUTPUT + + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + cache: true + cache-shared-key: ${{ runner.os }}-${{ steps.system.outputs.distro }}-${{ steps.system.outputs.version }}-${{ runner.arch }}-rust + cache-on-failure: true + cache-all-crates: true + cache-workspace-crates: true + + - name: Setup Python 3.13 + uses: actions/setup-python@v4 + with: + python-version: "3.13" + + - name: Upgrade Python build tools + run: pip install --upgrade pip setuptools wheel + + - name: Run Python Cluster E2E Tests + run: pnpm install --frozen-lockfile && pnpm --filter=framework-cli-e2e run test -- --grep "Python Cluster Template" + env: + MOOSE_TELEMETRY_ENABLED: false + + - name: Inspect Logs + if: always() + run: | + cat ~/.moose/*-cli.log + lints: needs: detect-changes if: needs.detect-changes.outputs.should_run == 'true' @@ -776,8 +908,10 @@ jobs: test-e2e-typescript-tests, test-e2e-python-default, test-e2e-python-tests, - test-e2e-backward-compatibility-typescript, +# test-e2e-backward-compatibility-typescript, test-e2e-backward-compatibility-python, + test-e2e-cluster-typescript, + test-e2e-cluster-python, lints, ] if: always() @@ -805,8 +939,9 @@ jobs: [[ "${{ needs.test-e2e-typescript-tests.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-python-default.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-python-tests.result }}" == "failure" ]] || \ - [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "failure" ]] || \ + [[ "${{ needs.test-e2e-cluster-typescript.result }}" == "failure" ]] || \ + [[ "${{ needs.test-e2e-cluster-python.result }}" == "failure" ]] || \ [[ "${{ needs.lints.result }}" == "failure" ]]; then echo "One or more required jobs failed" exit 1 @@ -820,8 +955,9 @@ jobs: [[ "${{ needs.test-e2e-typescript-tests.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-python-default.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-python-tests.result }}" == "success" ]] && \ - [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "success" ]] && \ + [[ "${{ needs.test-e2e-cluster-typescript.result }}" == "success" ]] && \ + [[ "${{ needs.test-e2e-cluster-python.result }}" == "success" ]] && \ [[ "${{ needs.lints.result }}" == "success" ]]; then echo "All required jobs succeeded" exit 0 diff --git a/AGENTS.md b/AGENTS.md index cdbf32d22f..1e4c0fa1ff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,117 +1,78 @@ # AGENTS.md -When you are changing MooseStack functionality (either in the language specific libraries or in the Rust core) ALWAYS run the -end-to-end tests to make sure you did not break anything. - -When you change user facing functionality for moose, always add end-to-end tests for the `python-tests` and `typescript-tests` -templates and ALWAYS audit for the documentation for update needs. Those projects are Moose project that should be using Moose code. -The checks if the moose code works as expected should be inside `apps/framework-cli-e2e`. - -You can find the logs for moose if you need to troublehoot into `~/.moose/*-cli.log` - -## Commands - -### Build and Development -- **Build all packages**: `pnpm build` (uses Turbo Repo) -- **Development mode**: `pnpm dev` (starts development servers) -- **Linting**: `pnpm lint` -- **Formatting**: `pnpm format` (Prettier on TypeScript/JavaScript files) -- **Clean build artifacts**: `pnpm clean` - -### Rust Components -- **Build Rust**: `cargo build` -- **Run Rust tests**: `cargo test` -- **Lint Rust code**: `cargo clippy --all-targets -- -D warnings` (no warnings allowed) -- **Format Rust code**: `rustfmt --edition 2021 ` - -### Testing - -#### Unit Tests (Library Testing) -- **Rust tests**: `cargo test` -- **TypeScript library tests**: Navigate to `./packages/ts-moose-lib` and run `pnpm test` -- **Python library tests**: Navigate to `./packages/py-moose-lib` and run `pytest` - -Unit tests should be colocated with the library code they test: -- TypeScript library tests: `packages/ts-moose-lib/tests/` -- Python library tests: `packages/py-moose-lib/tests/` -- Rust tests: Inline with code using `#[cfg(test)]` modules - -#### End-to-End Tests (Template Integration Testing) -- **End-to-end tests**: Navigate to `./apps/framework-cli-e2e` and run `pnpm test` - -End-to-end tests verify that complete MooseStack applications work correctly: -- Tests use the templates in `templates/` as working Moose applications -- Templates like `python-tests` and `typescript-tests` are complete MooseStack projects -- E2E tests verify infrastructure creation, data ingestion, API responses, etc. - -#### Testing Distinction: Templates vs Libraries -**IMPORTANT**: Templates are NOT for library unit tests. They are working MooseStack applications. - -- **Templates** (`templates/python-tests`, `templates/typescript-tests`): - - Complete, runnable Moose applications - - Used by E2E tests to verify end-to-end functionality - - Should demonstrate features and serve as examples - - Tested by `apps/framework-cli-e2e` - -- **Library Tests** (`packages/*/tests/`): - - Unit tests for library functionality - - Test individual functions, classes, and modules - - Should be colocated with the library code - - Run independently of the CLI or infrastructure - -## Repository Architecture - -### Monorepo Structure -This is a multi-language monorepo using: -- **PNPM workspaces** with **Turbo Repo** for JavaScript/TypeScript packages -- **Cargo workspace** for Rust components -- **Cross-language integration** between Rust CLI and TypeScript/Python libraries - -### Key Directories -- `apps/`: End-to-end tests, CLI application, docs, and distribution packages - - `framework-cli/`: Main Rust CLI application - - `framework-docs/`: Documentation site - - `framework-cli-e2e/`: End-to-end test suite -- `packages/`: Shared libraries and common dependencies - - `ts-moose-lib/`: TypeScript library for MooseStack - - `py-moose-lib/`: Python library for MooseStack - - `protobuf/`: Protocol buffer definitions -- `templates/`: Standalone Moose project templates - -### Core Technologies -- **Rust**: CLI application, performance-critical components -- **TypeScript**: Developer libraries, web interfaces -- **Python**: Alternative developer library -- **ClickHouse**: OLAP database -- **Redpanda/Kafka**: Streaming platform -- **Temporal**: Workflow orchestration -- **Redis**: Internal state management - -### Architecture Patterns -- **Code-first infrastructure**: Declare tables, streams, APIs in code -- **Type-safe development**: Strong typing across TypeScript and Rust -- **Modular design**: Independent modules (OLAP, Streaming, Workflows, APIs) -- **Local-first development**: Full production mirror via `moose dev` - -## Development Guidelines - -### Pre-commit Requirements -- **TypeScript/JavaScript**: Must pass linting and code formating checks (`npx lint-staged`) -- **Rust**: Must pass `cargo clippy --all-targets -- -D warnings` (no warnings permitted) -- **All components**: Tests must pass before PR submission - -### Error Handling (Rust) -- Define error types near their unit of fallibility (no global `Error` type) -- Use `thiserror` for error definitions with `#[derive(thiserror::Error)]` -- Structure errors in layers with context and specific variants -- Never use `anyhow::Result` - refactor to use `thiserror` - -### Code Standards -- **Constants**: Use `const` in Rust, place in `constants.rs` at appropriate module level -- **Newtypes**: Use tuple structs with validation constructors -- **Documentation**: All public APIs must be documented -- **Linting**: Always run `cargo clippy --all-targets -- -D warnings` for Rust code -- Follow existing patterns and conventions in each language - -### Templates -Templates in the `templates/` directory must be able to run in isolation. When modifying templates, verify they can still function as standalone projects. \ No newline at end of file +Multi-language monorepo (Rust CLI + TypeScript/Python libraries) using PNPM workspaces, Turbo Repo, and Cargo workspace. + +**CRITICAL**: When changing MooseStack functionality, ALWAYS run end-to-end tests. When changing user-facing features, add E2E tests to `python-tests`/`typescript-tests` templates AND audit documentation. Logs: `~/.moose/*-cli.log`. Always format the code. + +## Build & Development Commands + +### All Languages +- **Build all**: `pnpm build` (Turbo orchestrates builds) +- **Dev mode**: `pnpm dev` (starts dev servers) +- **Clean**: `pnpm clean` +- **Lint all**: `pnpm lint` +- **Format**: `pnpm format` (Prettier for TS/JS) + +### Rust +- **Build**: `cargo build` +- **Debug CLI**: Use debug build with verbose logging for ALL moose CLI commands: + ```bash + RUST_LOG=debug RUST_BACKTRACE=1 MOOSE_LOGGER__LEVEL=Debug ~/repos/moosestack/target/debug/moose-cli + ``` + Example: `RUST_LOG=debug RUST_BACKTRACE=1 MOOSE_LOGGER__LEVEL=Debug ~/repos/moosestack/target/debug/moose-cli init my-app typescript` +- **Test all**: `cargo test` +- **Test single**: `cargo test ` or `cargo test --package --test ` +- **Lint**: `cargo clippy --all-targets -- -D warnings` (REQUIRED pre-commit, no warnings allowed) +- **Format**: `cargo fmt` + +### TypeScript +- **Test lib**: `cd packages/ts-moose-lib && pnpm test` (runs mocha tests) +- **Test single**: `cd packages/ts-moose-lib && pnpm test --grep "test name pattern"` +- **Typecheck**: `cd packages/ts-moose-lib && pnpm typecheck` + +### Python +- **Test lib**: `cd packages/py-moose-lib && pytest` +- **Test single**: `cd packages/py-moose-lib && pytest tests/test_file.py::test_function_name` +- **Test pattern**: `cd packages/py-moose-lib && pytest -k "test_pattern"` + +### End-to-End Tests +- **Run E2E**: `cd apps/framework-cli-e2e && pnpm test` (includes pretest: cargo build, pnpm build, package templates) +- **Single E2E test**: `cd apps/framework-cli-e2e && pnpm test --grep "test name"` + +## Code Style Guidelines + +### TypeScript/JavaScript +- **Imports**: Group by external deps, internal modules, types; use named exports from barrel files (`index.ts`) +- **Naming**: camelCase for vars/functions, PascalCase for types/classes/components, UPPER_SNAKE_CASE for constants +- **Types**: Prefer interfaces for objects, types for unions/intersections; explicit return types on public APIs +- **Unused vars**: Prefix with `_` (e.g., `_unusedParam`) to bypass linting errors +- **Formatting**: Prettier with `experimentalTernaries: true`; auto-formats on commit (Husky + lint-staged) +- **ESLint**: Extends Next.js, Turbo, TypeScript recommended; `@typescript-eslint/no-explicit-any` disabled + +### Rust +- **Error handling**: Use `thiserror` with `#[derive(thiserror::Error)]`; define errors near fallibility unit (NO global `Error` type); NEVER use `anyhow::Result` +- **Naming**: snake_case for functions/vars, PascalCase for types/traits, SCREAMING_SNAKE_CASE for constants +- **Constants**: Place in `constants.rs` at appropriate module level +- **Newtypes**: Use tuple structs with validation constructors (e.g., `struct UserId(String)`) +- **Tests**: Inline with `#[cfg(test)]` modules +- **Documentation**: Required for all public APIs + +### Python +- **Style**: Follow PEP 8; snake_case for functions/vars, PascalCase for classes, UPPER_SNAKE_CASE for constants +- **Types**: Use type hints for function signatures and public APIs +- **Tests**: Use pytest with fixtures and parametrize decorators + +## Repository Structure + +- **`apps/`**: CLI (`framework-cli/`), docs (`framework-docs/`), E2E tests (`framework-cli-e2e/`) +- **`packages/`**: Libraries (`ts-moose-lib/`, `py-moose-lib/`), shared deps, protobuf definitions +- **`templates/`**: Standalone Moose apps used by E2E tests (NOT for unit tests) + +## Testing Philosophy + +- **Library tests** (`packages/*/tests/`): Unit tests colocated with library code +- **Templates** (`templates/python-tests`, `templates/typescript-tests`): Complete Moose apps for E2E testing; must run in isolation + +## Key Technologies + +Rust (CLI), TypeScript (libs/web), Python (lib), ClickHouse (OLAP), Redpanda/Kafka (streaming), Temporal (workflows), Redis (state) \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index c0ac1d1df9..aa23183992 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -556,28 +556,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "chrono-tz" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" -dependencies = [ - "chrono", - "chrono-tz-build", - "phf", -] - -[[package]] -name = "chrono-tz-build" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" -dependencies = [ - "parse-zoneinfo", - "phf", - "phf_codegen", -] - [[package]] name = "cityhash-rs" version = "1.0.1" @@ -664,45 +642,6 @@ dependencies = [ "syn", ] -[[package]] -name = "clickhouse-rs" -version = "1.1.0-alpha.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802fe62a5480415bcdbb5217b3ea029d748c9a3ce3b884767cf58888e33e7f65" -dependencies = [ - "byteorder", - "chrono", - "chrono-tz", - "clickhouse-rs-cityhash-sys", - "combine", - "crossbeam", - "either", - "futures-core", - "futures-sink", - "futures-util", - "hostname", - "lazy_static", - "log", - "lz4", - "native-tls", - "percent-encoding", - "pin-project", - "thiserror 1.0.69", - "tokio", - "tokio-native-tls", - "url", - "uuid", -] - -[[package]] -name = "clickhouse-rs-cityhash-sys" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4baf9d4700a28d6cb600e17ed6ae2b43298a5245f1f76b4eab63027ebfd592b9" -dependencies = [ - "cc", -] - [[package]] name = "clickhouse-types" version = "0.1.0" @@ -844,19 +783,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -1317,16 +1243,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fern" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4316185f709b23713e41e3195f90edef7fb00c3ed4adc79769cf09cc762a3b29" -dependencies = [ - "chrono", - "log", -] - [[package]] name = "filetime" version = "0.2.26" @@ -1787,17 +1703,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "hostname" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" -dependencies = [ - "libc", - "match_cfg", - "winapi", -] - [[package]] name = "http" version = "0.2.12" @@ -2572,9 +2477,6 @@ name = "log" version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" -dependencies = [ - "serde", -] [[package]] name = "logos" @@ -2619,25 +2521,6 @@ dependencies = [ "hashbrown 0.15.5", ] -[[package]] -name = "lz4" -version = "1.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" -dependencies = [ - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.11.1+lz4-1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "lz4_flex" version = "0.11.5" @@ -2702,12 +2585,6 @@ dependencies = [ "malachite-nz", ] -[[package]] -name = "match_cfg" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" - [[package]] name = "matchers" version = "0.2.0" @@ -2870,7 +2747,6 @@ dependencies = [ "chrono", "clap", "clickhouse", - "clickhouse-rs", "comfy-table", "config", "constant_time_eq", @@ -2878,7 +2754,6 @@ dependencies = [ "crossterm 0.27.0", "csv", "dotenvy", - "fern", "flate2", "futures", "git2", @@ -2895,14 +2770,13 @@ dependencies = [ "jsonwebtoken", "keyring", "lazy_static", - "log", "logos", "notify", "num-bigint", "num-traits", "openssl", "opentelemetry 0.29.1", - "opentelemetry-appender-log", + "opentelemetry-appender-tracing", "opentelemetry-http 0.29.0", "opentelemetry-otlp 0.29.0", "opentelemetry-semantic-conventions", @@ -2947,6 +2821,7 @@ dependencies = [ "toml_edit 0.22.27", "tonic", "tracing", + "tracing-subscriber", "uuid", "walkdir", ] @@ -3255,13 +3130,15 @@ dependencies = [ ] [[package]] -name = "opentelemetry-appender-log" -version = "0.29.0" +name = "opentelemetry-appender-tracing" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9268ffd1e361eb0cc835d3daad47a52e0159cea7495570ee7c22f7f72cc00dd1" +checksum = "e716f864eb23007bdd9dc4aec381e188a1cee28eecf22066772b5fd822b9727d" dependencies = [ - "log", "opentelemetry 0.29.1", + "tracing", + "tracing-core", + "tracing-subscriber", ] [[package]] @@ -3463,15 +3340,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "parse-zoneinfo" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" -dependencies = [ - "regex", -] - [[package]] name = "password-hash" version = "0.5.0" @@ -5183,12 +5051,24 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -6012,6 +5892,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.20" @@ -6023,12 +5913,15 @@ dependencies = [ "once_cell", "parking_lot", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] diff --git a/apps/framework-cli-e2e/test/backward-compatibility.test.ts b/apps/framework-cli-e2e/test/backward-compatibility.test.ts index 28bb5daaf5..8e85887b5f 100644 --- a/apps/framework-cli-e2e/test/backward-compatibility.test.ts +++ b/apps/framework-cli-e2e/test/backward-compatibility.test.ts @@ -15,7 +15,7 @@ * 5. Asserts that no (or minimal expected) changes are detected * * This is critical for catching breaking changes in infrastructure map format, - * particularly changes like table ID prefixes with database names. + * particularly changes like table ID prefixes with database names and upgrades */ import { spawn, ChildProcess } from "child_process"; @@ -352,19 +352,13 @@ describe("Backward Compatibility Tests", function () { it("should show no changes when running moose plan with new CLI", async function () { this.timeout(TIMEOUTS.TEST_SETUP_MS); - console.log( - `\nRunning 'moose plan' with NEW CLI (${CLI_PATH}) on project initialized with latest published CLI...`, - ); - console.log( - "Querying running dev server (started with old CLI) to get infrastructure map", - ); - // Run moose plan with NEW CLI (querying the running server) // Use the same admin token that was configured for the old dev server try { const TEST_ADMIN_TOKEN = "deadbeefdeadbeefdeadbeefdeadbeef.0123456789abcdef0123456789abcdef"; - const { stdout, stderr } = await execAsync( + + const { stdout } = await execAsync( `"${CLI_PATH}" plan --url "http://localhost:4000" --token "${TEST_ADMIN_TOKEN}"`, { cwd: TEST_PROJECT_DIR, @@ -385,11 +379,6 @@ describe("Backward Compatibility Tests", function () { }, ); - console.log("moose plan stdout:", stdout); - if (stderr) { - console.log("moose plan stderr:", stderr); - } - // Strip ANSI color codes from the output for reliable parsing const stripAnsi = (str: string) => str.replace(/\x1b\[[0-9;]*m/g, ""); const cleanOutput = stripAnsi(stdout); diff --git a/apps/framework-cli-e2e/test/cli-query.test.ts b/apps/framework-cli-e2e/test/cli-query.test.ts new file mode 100644 index 0000000000..5748d4b204 --- /dev/null +++ b/apps/framework-cli-e2e/test/cli-query.test.ts @@ -0,0 +1,474 @@ +/// +/// +/// +/** + * E2E tests for moose query command (ENG-1226) + * + * Tests the query command functionality: + * 1. Execute SQL from command line argument + * 2. Execute SQL from file + * 3. Execute SQL from stdin + * 4. Respect limit parameter + * 5. Handle errors gracefully + */ + +import { spawn, ChildProcess } from "child_process"; +import { expect } from "chai"; +import * as fs from "fs"; +import * as path from "path"; +import { promisify } from "util"; + +import { TIMEOUTS } from "./constants"; +import { + waitForServerStart, + createTempTestDirectory, + cleanupTestSuite, + setupTypeScriptProject, +} from "./utils"; + +const execAsync = promisify(require("child_process").exec); + +const CLI_PATH = path.resolve(__dirname, "../../../target/debug/moose-cli"); +const MOOSE_TS_LIB_PATH = path.resolve( + __dirname, + "../../../packages/ts-moose-lib", +); + +describe("moose query command", () => { + let devProcess: ChildProcess; + let testProjectDir: string; + + before(async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + console.log("\n=== Starting Query Command Test ==="); + + // Create temp test directory + testProjectDir = createTempTestDirectory("query-cmd-test"); + console.log("Test project dir:", testProjectDir); + + // Setup TypeScript project + await setupTypeScriptProject( + testProjectDir, + "typescript-empty", + CLI_PATH, + MOOSE_TS_LIB_PATH, + "test-query-cmd", + "npm", + ); + + // Start moose dev + console.log("\nStarting moose dev..."); + devProcess = spawn(CLI_PATH, ["dev"], { + stdio: "pipe", + cwd: testProjectDir, + }); + + await waitForServerStart( + devProcess, + TIMEOUTS.SERVER_STARTUP_MS, + "development server started", + "http://localhost:4000", + ); + + console.log("✓ Infrastructure ready"); + }); + + after(async function () { + this.timeout(TIMEOUTS.CLEANUP_MS); + console.log("\n=== Cleaning up Query Command Test ==="); + + await cleanupTestSuite(devProcess, testProjectDir, "query-cmd-test", { + logPrefix: "Query Command Test", + }); + }); + + it("should execute simple SELECT query from argument", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing query from argument ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query "SELECT 1 as num"`, + { + cwd: testProjectDir, + }, + ); + + console.log("Query output:", stdout); + + expect(stdout).to.include('{"num":1}'); + expect(stdout).to.include("1 rows"); + + console.log("✓ Query from argument works"); + }); + + it("should execute query with multiple rows", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing query with multiple rows ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query "SELECT number FROM system.numbers LIMIT 5"`, + { cwd: testProjectDir }, + ); + + const lines = stdout + .trim() + .split("\n") + .filter((l: string) => l.startsWith("{")); + expect(lines.length).to.equal(5); + + // Verify JSON format + lines.forEach((line: string, idx: number) => { + const parsed = JSON.parse(line); + expect(parsed.number).to.equal(idx); + }); + + console.log("✓ Multiple rows returned correctly"); + }); + + it("should execute query from file", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing query from file ---"); + + const queryFile = path.join(testProjectDir, "test-query.sql"); + fs.writeFileSync(queryFile, "SELECT 'hello' as greeting, 42 as answer"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -f test-query.sql`, + { cwd: testProjectDir }, + ); + + console.log("Query output:", stdout); + + expect(stdout).to.include('"greeting":"hello"'); + expect(stdout).to.include('"answer":42'); + + console.log("✓ Query from file works"); + }); + + it("should execute query from stdin", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing query from stdin ---"); + + const { stdout } = await execAsync( + `echo "SELECT 'stdin' as source" | "${CLI_PATH}" query`, + { cwd: testProjectDir, shell: "/bin/bash" }, + ); + + console.log("Query output:", stdout); + + expect(stdout).to.include('"source":"stdin"'); + + console.log("✓ Query from stdin works"); + }); + + it("should respect limit parameter", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing limit parameter ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query "SELECT number FROM system.numbers" --limit 3`, + { cwd: testProjectDir }, + ); + + const lines = stdout + .trim() + .split("\n") + .filter((l: string) => l.startsWith("{")); + expect(lines.length).to.equal(3); + expect(stdout).to.include("3 rows"); + + console.log("✓ Limit parameter works"); + }); + + it("should handle query errors gracefully", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing error handling ---"); + + try { + await execAsync( + `"${CLI_PATH}" query "SELECT * FROM nonexistent_table_xyz"`, + { cwd: testProjectDir }, + ); + expect.fail("Should have thrown an error"); + } catch (error: any) { + expect(error.message).to.include("ClickHouse query error"); + console.log("✓ Query errors handled gracefully"); + } + }); + + describe("format query flag", () => { + it("should format query as Python code", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing Python formatting ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python "SELECT * FROM users WHERE email REGEXP '[a-z]+'"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include( + "SELECT * FROM users WHERE email REGEXP '[a-z]+'", + ); + expect(stdout).to.include('"""'); + expect(stdout).not.to.include("{"); // Should not have JSON output + + console.log("✓ Python formatting works"); + }); + + it("should format query as TypeScript code", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing TypeScript formatting ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c typescript "SELECT * FROM users"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include("`"); + expect(stdout).to.include("SELECT * FROM users"); + expect(stdout).not.to.include("{"); // Should not have JSON output + + console.log("✓ TypeScript formatting works"); + }); + + it("should format query from file", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing format from file ---"); + + const queryFile = path.join(testProjectDir, "format-test.sql"); + fs.writeFileSync(queryFile, "SELECT count(*) as total FROM events"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python -f format-test.sql`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include("SELECT count(*) as total FROM events"); + + console.log("✓ Format from file works"); + }); + + it("should reject invalid language", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing invalid language ---"); + + try { + await execAsync(`"${CLI_PATH}" query -c java "SELECT 1"`, { + cwd: testProjectDir, + }); + expect.fail("Should have thrown an error"); + } catch (error: any) { + expect(error.message).to.include("Unsupported language"); + console.log("✓ Invalid language rejected"); + } + }); + + it("should accept language aliases", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing language aliases ---"); + + const pyResult = await execAsync(`"${CLI_PATH}" query -c py "SELECT 1"`, { + cwd: testProjectDir, + }); + expect(pyResult.stdout).to.include('r"""'); + + const tsResult = await execAsync(`"${CLI_PATH}" query -c ts "SELECT 1"`, { + cwd: testProjectDir, + }); + expect(tsResult.stdout).to.include("`"); + + console.log("✓ Language aliases work"); + }); + + it("should format multi-line SQL with proper indentation", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing multi-line SQL ---"); + + const queryFile = path.join(testProjectDir, "multiline-query.sql"); + const multilineSQL = `SELECT + user_id, + email, + created_at +FROM users +WHERE status = 'active' +ORDER BY created_at DESC`; + fs.writeFileSync(queryFile, multilineSQL); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python -f multiline-query.sql`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include(" user_id,"); + expect(stdout).to.include("ORDER BY created_at DESC"); + expect(stdout).to.include('"""'); + + console.log("✓ Multi-line SQL preserved correctly"); + }); + + it("should format SQL with complex regex patterns", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing complex regex patterns ---"); + + const complexQuery = `SELECT * FROM logs WHERE message REGEXP '\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\s+\\\\w+'`; + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python "${complexQuery}"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + // Raw strings should preserve backslashes + expect(stdout).to.include("\\d{4}"); + expect(stdout).to.include("REGEXP"); + + console.log("✓ Complex regex patterns preserved"); + }); + + it("should format SQL with email regex pattern", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing email regex pattern ---"); + + const emailQuery = `SELECT * FROM users WHERE email REGEXP '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}$'`; + + const pyResult = await execAsync( + `"${CLI_PATH}" query -c python "${emailQuery}"`, + { cwd: testProjectDir }, + ); + + expect(pyResult.stdout).to.include('r"""'); + expect(pyResult.stdout).to.include("[a-zA-Z0-9._%+-]+"); + + const tsResult = await execAsync( + `"${CLI_PATH}" query -c typescript "${emailQuery}"`, + { cwd: testProjectDir }, + ); + + expect(tsResult.stdout).to.include("`"); + expect(tsResult.stdout).to.include("[a-zA-Z0-9._%+-]+"); + + console.log("✓ Email regex pattern preserved"); + }); + + it("should handle queries with single quotes and backslashes", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing quotes and backslashes ---"); + + const queryFile = path.join(testProjectDir, "complex-pattern.sql"); + const complexSQL = `SELECT * FROM data WHERE pattern REGEXP '\\\\b(foo|bar)\\\\b' AND name = 'test'`; + fs.writeFileSync(queryFile, complexSQL); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python -f complex-pattern.sql`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include("name = 'test'"); + expect(stdout).to.include("\\b(foo|bar)\\b"); + + console.log("✓ Quotes and backslashes preserved"); + }); + + it("should prettify SQL when --prettify flag is used", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing prettify functionality ---"); + + const messyQuery = + "SELECT id, name FROM users WHERE active = 1 ORDER BY name LIMIT 10"; + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python -p "${messyQuery}"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include("SELECT"); + expect(stdout).to.include("FROM"); + expect(stdout).to.include("WHERE"); + expect(stdout).to.include("ORDER BY"); + // Should have line breaks (prettified) + const lines = stdout.split("\n"); + expect(lines.length).to.be.greaterThan(3); + + console.log("✓ Prettify works"); + }); + + it("should prettify complex SQL with TypeScript", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing prettify with TypeScript ---"); + + const complexQuery = + "SELECT u.id, u.name, o.total FROM users u LEFT JOIN orders o ON u.id = o.user_id WHERE u.active = 1 AND o.total > 100 ORDER BY o.total DESC"; + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c typescript -p "${complexQuery}"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include("`"); + expect(stdout).to.include("SELECT"); + expect(stdout).to.include("LEFT JOIN"); + expect(stdout).to.include("WHERE"); + expect(stdout).to.include("ORDER BY"); + + console.log("✓ Prettify with TypeScript works"); + }); + + it("should require format-query flag when using prettify", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing prettify requires format-query ---"); + + try { + await execAsync(`"${CLI_PATH}" query -p "SELECT 1"`, { + cwd: testProjectDir, + }); + expect.fail("Should have thrown an error"); + } catch (error: any) { + // clap should enforce this requirement + expect(error.message).to.match( + /requires.*format-query|required argument/i, + ); + console.log("✓ Prettify requires format-query flag"); + } + }); + }); +}); diff --git a/apps/framework-cli-e2e/test/cluster.test.ts b/apps/framework-cli-e2e/test/cluster.test.ts new file mode 100644 index 0000000000..8397a75e59 --- /dev/null +++ b/apps/framework-cli-e2e/test/cluster.test.ts @@ -0,0 +1,463 @@ +/// +/// +/// +/** + * Cluster Support E2E Tests + * + * Tests the ON CLUSTER functionality for ClickHouse tables in MooseStack. + * + * The tests verify: + * 1. Tables are created with ON CLUSTER clause when cluster is specified + * 2. ClickHouse clusters are properly configured from moose.config.toml + * 3. cluster_name appears correctly in the infrastructure map + * 4. Mixed environments (some tables with cluster, some without) work correctly + * 5. Both TypeScript and Python SDKs support cluster configuration + * 6. ReplicatedMergeTree with explicit keeper_path/replica_name (no cluster) works correctly + * 7. ReplicatedMergeTree with auto-injected params (ClickHouse Cloud mode) works correctly + */ + +import { spawn, ChildProcess } from "child_process"; +import { expect } from "chai"; +import * as fs from "fs"; +import * as path from "path"; +import { promisify } from "util"; +import { createClient } from "@clickhouse/client"; + +// Import constants and utilities +import { + TIMEOUTS, + SERVER_CONFIG, + TEMPLATE_NAMES, + APP_NAMES, + CLICKHOUSE_CONFIG, +} from "./constants"; + +import { + waitForServerStart, + createTempTestDirectory, + setupTypeScriptProject, + setupPythonProject, + cleanupTestSuite, + performGlobalCleanup, + cleanupClickhouseData, + waitForInfrastructureReady, +} from "./utils"; + +const execAsync = promisify(require("child_process").exec); + +const CLI_PATH = path.resolve(__dirname, "../../../target/debug/moose-cli"); +const MOOSE_LIB_PATH = path.resolve( + __dirname, + "../../../packages/ts-moose-lib", +); +const MOOSE_PY_LIB_PATH = path.resolve( + __dirname, + "../../../packages/py-moose-lib", +); + +// Admin API key hash for authentication +const TEST_ADMIN_HASH = + "deadbeefdeadbeefdeadbeefdeadbeef.0123456789abcdef0123456789abcdef"; + +/** + * Query ClickHouse to verify cluster configuration + */ +async function verifyClustersInClickHouse( + expectedClusters: string[], +): Promise { + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + }); + + try { + const result = await client.query({ + query: "SELECT DISTINCT cluster FROM system.clusters ORDER BY cluster", + format: "JSONEachRow", + }); + + const clusters = await result.json<{ cluster: string }>(); + const clusterNames = clusters.map((row) => row.cluster); + + console.log("Clusters found in ClickHouse:", clusterNames); + + for (const expected of expectedClusters) { + expect( + clusterNames, + `Cluster '${expected}' should be configured in ClickHouse`, + ).to.include(expected); + } + } finally { + await client.close(); + } +} + +/** + * Query inframap to verify cluster_name is set correctly + */ +async function verifyInfraMapClusters( + expectedTables: { name: string; cluster: string | null }[], +): Promise { + const response = await fetch(`${SERVER_CONFIG.url}/admin/inframap`, { + headers: { + Authorization: `Bearer ${TEST_ADMIN_HASH}`, + }, + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `inframap endpoint returned ${response.status}: ${errorText}`, + ); + } + + const response_data = await response.json(); + console.log("InfraMap response:", JSON.stringify(response_data, null, 2)); + + // Handle both direct format and wrapped format + const infraMap = response_data.infra_map || response_data; + + expect(infraMap.tables, "InfraMap should have tables field").to.exist; + + console.log("InfraMap tables:", Object.keys(infraMap.tables)); + + for (const expectedTable of expectedTables) { + const tableKey = `local_${expectedTable.name}`; + const table = infraMap.tables[tableKey]; + + expect(table, `Table ${expectedTable.name} should exist in inframap`).to + .exist; + + // Normalize undefined to null for comparison (undefined means field not present) + const actualCluster = + table.cluster_name === undefined ? null : table.cluster_name; + expect( + actualCluster, + `Table ${expectedTable.name} should have correct cluster_name`, + ).to.equal(expectedTable.cluster); + } +} + +/** + * Verify that the clickhouse_clusters.xml file was generated + */ +function verifyClusterXmlGenerated(projectDir: string): void { + const clusterXmlPath = path.join( + projectDir, + ".moose/clickhouse_clusters.xml", + ); + + expect( + fs.existsSync(clusterXmlPath), + "clickhouse_clusters.xml should be generated in .moose directory", + ).to.be.true; + + const xmlContent = fs.readFileSync(clusterXmlPath, "utf-8"); + console.log("Generated cluster XML:", xmlContent); + + // Verify XML contains expected cluster definitions + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); +} + +/** + * Verify table exists in ClickHouse + * + * Note: ON CLUSTER is a DDL execution directive and is NOT stored in the table schema. + * SHOW CREATE TABLE will never display ON CLUSTER, even if it was used during creation. + * To verify cluster support, we rely on: + * 1. The inframap showing cluster_name (preserved in our state) + * 2. The table being successfully created (which would fail if cluster was misconfigured) + */ +async function verifyTableExists(tableName: string): Promise { + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: `SELECT name, engine FROM system.tables WHERE database = '${CLICKHOUSE_CONFIG.database}' AND name = '${tableName}'`, + format: "JSONEachRow", + }); + + const rows = await result.json<{ name: string; engine: string }>(); + expect( + rows.length, + `Table ${tableName} should exist in ClickHouse`, + ).to.equal(1); + console.log(`Table ${tableName} exists with engine: ${rows[0].engine}`); + } finally { + await client.close(); + } +} + +/** + * Configuration for cluster template tests + */ +interface ClusterTestConfig { + language: "typescript" | "python"; + templateName: string; + appName: string; + projectDirSuffix: string; + displayName: string; +} + +const CLUSTER_CONFIGS: ClusterTestConfig[] = [ + { + language: "typescript", + templateName: TEMPLATE_NAMES.TYPESCRIPT_CLUSTER, + appName: APP_NAMES.TYPESCRIPT_CLUSTER, + projectDirSuffix: "ts-cluster", + displayName: "TypeScript Cluster Template", + }, + { + language: "python", + templateName: TEMPLATE_NAMES.PYTHON_CLUSTER, + appName: APP_NAMES.PYTHON_CLUSTER, + projectDirSuffix: "py-cluster", + displayName: "Python Cluster Template", + }, +]; + +/** + * Creates a test suite for a specific cluster template configuration + */ +const createClusterTestSuite = (config: ClusterTestConfig) => { + describe(config.displayName, function () { + let devProcess: ChildProcess | null = null; + let TEST_PROJECT_DIR: string; + + before(async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + // Verify CLI exists + try { + await fs.promises.access(CLI_PATH, fs.constants.F_OK); + } catch (err) { + console.error( + `CLI not found at ${CLI_PATH}. It should be built in the pretest step.`, + ); + throw err; + } + + // Create temporary directory for this test + TEST_PROJECT_DIR = createTempTestDirectory(config.projectDirSuffix); + + // Setup project based on language + if (config.language === "typescript") { + await setupTypeScriptProject( + TEST_PROJECT_DIR, + config.templateName, + CLI_PATH, + MOOSE_LIB_PATH, + config.appName, + "npm", + ); + } else { + await setupPythonProject( + TEST_PROJECT_DIR, + config.templateName, + CLI_PATH, + MOOSE_PY_LIB_PATH, + config.appName, + ); + } + + // Start dev server + console.log("Starting dev server..."); + const devEnv = + config.language === "python" ? + { + ...process.env, + VIRTUAL_ENV: path.join(TEST_PROJECT_DIR, ".venv"), + PATH: `${path.join(TEST_PROJECT_DIR, ".venv", "bin")}:${process.env.PATH}`, + } + : { ...process.env }; + + devProcess = spawn(CLI_PATH, ["dev"], { + stdio: "pipe", + cwd: TEST_PROJECT_DIR, + env: devEnv, + }); + + await waitForServerStart( + devProcess, + TIMEOUTS.SERVER_STARTUP_MS, + SERVER_CONFIG.startupMessage, + SERVER_CONFIG.url, + ); + console.log("Server started, cleaning up old data..."); + await cleanupClickhouseData(); + console.log("Waiting for infrastructure to be ready..."); + await waitForInfrastructureReady(); + console.log("All components ready, starting tests..."); + }); + + after(async function () { + this.timeout(TIMEOUTS.CLEANUP_MS); + await cleanupTestSuite(devProcess, TEST_PROJECT_DIR, config.appName, { + logPrefix: config.displayName, + }); + }); + + it("should create tables with ON CLUSTER clauses", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify all tables were created in ClickHouse + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: + "SELECT name FROM system.tables WHERE database = 'local' AND name IN ('TableA', 'TableB', 'TableC', 'TableD', 'TableE') ORDER BY name", + format: "JSONEachRow", + }); + + const tables = await result.json<{ name: string }>(); + const tableNames = tables.map((t) => t.name); + + expect(tableNames).to.include("TableA"); + expect(tableNames).to.include("TableB"); + expect(tableNames).to.include("TableC"); + expect(tableNames).to.include("TableD"); + expect(tableNames).to.include("TableE"); + } finally { + await client.close(); + } + }); + + it("should configure ClickHouse clusters from moose.config.toml", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + await verifyClustersInClickHouse(["cluster_a", "cluster_b"]); + }); + + it("should generate clickhouse_clusters.xml file", async function () { + verifyClusterXmlGenerated(TEST_PROJECT_DIR); + }); + + it("should show correct cluster_name in inframap", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + await verifyInfraMapClusters([ + { name: "TableA", cluster: "cluster_a" }, + { name: "TableB", cluster: "cluster_b" }, + { name: "TableC", cluster: null }, + { name: "TableD", cluster: null }, + { name: "TableE", cluster: null }, + ]); + }); + + it("should create tables successfully with cluster configuration", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify tables were created successfully + // (If cluster was misconfigured, table creation would have failed) + await verifyTableExists("TableA"); + await verifyTableExists("TableB"); + await verifyTableExists("TableC"); + await verifyTableExists("TableD"); + await verifyTableExists("TableE"); + }); + + it("should create TableD with explicit keeper args and no cluster", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify TableD was created with explicit keeper_path and replica_name + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: "SHOW CREATE TABLE local.TableD", + format: "JSONEachRow", + }); + + const data = await result.json<{ statement: string }>(); + const createStatement = data[0].statement; + + // Verify it's ReplicatedMergeTree + expect(createStatement).to.include("ReplicatedMergeTree"); + // Verify it has explicit keeper path + expect(createStatement).to.include( + "/clickhouse/tables/{database}/{table}", + ); + // Verify it has explicit replica name + expect(createStatement).to.include("{replica}"); + // Verify it does NOT have ON CLUSTER (since no cluster is specified) + expect(createStatement).to.not.include("ON CLUSTER"); + } finally { + await client.close(); + } + }); + + it("should create TableE with auto-injected params (ClickHouse Cloud mode)", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify TableE was created with ReplicatedMergeTree and auto-injected params + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: "SHOW CREATE TABLE local.TableE", + format: "JSONEachRow", + }); + + const data = await result.json<{ statement: string }>(); + const createStatement = data[0].statement; + + console.log(`TableE CREATE statement: ${createStatement}`); + + // Verify it's ReplicatedMergeTree + expect(createStatement).to.include("ReplicatedMergeTree"); + // Verify it has auto-injected params (Moose injects these in dev mode) + expect(createStatement).to.match(/ReplicatedMergeTree\(/); + // Verify it does NOT have ON CLUSTER (no cluster specified) + expect(createStatement).to.not.include("ON CLUSTER"); + } finally { + await client.close(); + } + }); + }); +}; + +// Global setup to clean Docker state from previous runs (useful for local dev) +// Github hosted runners start with a clean slate. +before(async function () { + this.timeout(TIMEOUTS.GLOBAL_CLEANUP_MS); + await performGlobalCleanup( + "Running global setup for cluster tests - cleaning Docker state from previous runs...", + ); +}); + +// Global cleanup to ensure no hanging processes +after(async function () { + this.timeout(TIMEOUTS.GLOBAL_CLEANUP_MS); + await performGlobalCleanup(); +}); + +// Test suite for Cluster Support +describe("Cluster Support E2E Tests", function () { + // Generate test suites for each cluster configuration + CLUSTER_CONFIGS.forEach(createClusterTestSuite); +}); diff --git a/apps/framework-cli-e2e/test/constants.ts b/apps/framework-cli-e2e/test/constants.ts index f6eba08eac..1b1641fd2e 100644 --- a/apps/framework-cli-e2e/test/constants.ts +++ b/apps/framework-cli-e2e/test/constants.ts @@ -96,6 +96,8 @@ export const TEMPLATE_NAMES = { TYPESCRIPT_TESTS: "typescript-tests", PYTHON_DEFAULT: "python", PYTHON_TESTS: "python-tests", + TYPESCRIPT_CLUSTER: "typescript-cluster", + PYTHON_CLUSTER: "python-cluster", } as const; export const APP_NAMES = { @@ -103,4 +105,6 @@ export const APP_NAMES = { TYPESCRIPT_TESTS: "moose-ts-tests-app", PYTHON_DEFAULT: "moose-py-default-app", PYTHON_TESTS: "moose-py-tests-app", + TYPESCRIPT_CLUSTER: "moose-ts-cluster-app", + PYTHON_CLUSTER: "moose-py-cluster-app", } as const; diff --git a/apps/framework-cli-e2e/test/templates.test.ts b/apps/framework-cli-e2e/test/templates.test.ts index 20d50826fb..241a2765e9 100644 --- a/apps/framework-cli-e2e/test/templates.test.ts +++ b/apps/framework-cli-e2e/test/templates.test.ts @@ -292,6 +292,54 @@ const createTemplateTestSuite = (config: TemplateTestConfig) => { } }); + it("should include PRIMARY KEY expression in DDL when configured", async function () { + if (config.isTestsVariant) { + // Test 1: Primary key with hash function + const ddl1 = await getTableDDL("PrimaryKeyExpressionTest", "local"); + const primaryKeyPattern = + config.language === "typescript" ? + "PRIMARY KEY (userId, cityHash64(eventId))" + : "PRIMARY KEY (user_id, cityHash64(event_id))"; + const orderByPattern = + config.language === "typescript" ? + "ORDER BY (userId, cityHash64(eventId), timestamp)" + : "ORDER BY (user_id, cityHash64(event_id), timestamp)"; + + if (!ddl1.includes(primaryKeyPattern)) { + throw new Error( + `PRIMARY KEY expression not found in PrimaryKeyExpressionTest DDL. Expected: ${primaryKeyPattern}. DDL: ${ddl1}`, + ); + } + if (!ddl1.includes(orderByPattern)) { + throw new Error( + `ORDER BY expression not found in PrimaryKeyExpressionTest DDL. Expected: ${orderByPattern}. DDL: ${ddl1}`, + ); + } + + // Test 2: Primary key with different ordering + const ddl2 = await getTableDDL("PrimaryKeyOrderingTest", "local"); + const primaryKeyPattern2 = + config.language === "typescript" ? + "PRIMARY KEY productId" + : "PRIMARY KEY product_id"; + const orderByPattern2 = + config.language === "typescript" ? + "ORDER BY (productId, category, brand)" + : "ORDER BY (product_id, category, brand)"; + + if (!ddl2.includes(primaryKeyPattern2)) { + throw new Error( + `PRIMARY KEY expression not found in PrimaryKeyOrderingTest DDL. Expected: ${primaryKeyPattern2}. DDL: ${ddl2}`, + ); + } + if (!ddl2.includes(orderByPattern2)) { + throw new Error( + `ORDER BY expression not found in PrimaryKeyOrderingTest DDL. Expected: ${orderByPattern2}. DDL: ${ddl2}`, + ); + } + } + }); + it("should generate FixedString types in DDL including type aliases", async function () { if (config.isTestsVariant && config.language === "python") { const ddl = await getTableDDL("FixedStringTest", "local"); @@ -1078,6 +1126,107 @@ const createTemplateTestSuite = (config: TemplateTestConfig) => { throw new Error("JSON payload not stored as expected"); } }); + + // DateTime precision test for TypeScript + it("should preserve microsecond precision with DateTime64String types via streaming transform", async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + const testId = randomUUID(); + const now = new Date(); + // Create ISO string with microseconds: 2024-01-15T10:30:00.123456Z + const timestampWithMicroseconds = now + .toISOString() + .replace(/\.\d{3}Z$/, ".123456Z"); + // Nanoseconds + const timestampWithNanoseconds = now + .toISOString() + .replace(/\.\d{3}Z$/, ".123456789Z"); + + console.log( + `Testing DateTime precision with timestamp: ${timestampWithMicroseconds}`, + ); + + // Ingest to DateTimePrecisionInput (which has a transform to Output) + const response = await fetch( + `${SERVER_CONFIG.url}/ingest/DateTimePrecisionInput`, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + id: testId, + createdAt: now.toISOString(), + timestampMs: now.toISOString(), + timestampUsDate: timestampWithMicroseconds, + timestampUsString: timestampWithMicroseconds, + timestampNs: timestampWithNanoseconds, + createdAtString: now.toISOString(), + }), + }, + ); + + if (!response.ok) { + const text = await response.text(); + throw new Error( + `Failed to ingest DateTimePrecisionInput: ${response.status}: ${text}`, + ); + } + + // Wait for transform to process and write to output table + await waitForDBWrite( + devProcess!, + "DateTimePrecisionOutput", + 1, + 60_000, + "local", + ); + + // Query the output data and verify precision + const client = createClient(CLICKHOUSE_CONFIG); + const result = await client.query({ + query: ` + SELECT + id, + toString(createdAt) as createdAt, + toString(timestampMs) as timestampMs, + toString(timestampUsDate) as timestampUsDate, + toString(timestampUsString) as timestampUsString, + toString(timestampNs) as timestampNs, + toString(createdAtString) as createdAtString + FROM local.DateTimePrecisionOutput + WHERE id = '${testId}' + `, + format: "JSONEachRow", + }); + + const data: any[] = await result.json(); + + if (data.length === 0) { + throw new Error( + `No data found for DateTimePrecisionOutput with id ${testId}`, + ); + } + + const row = data[0]; + console.log("Retrieved row:", row); + + // Verify that DateTime64String<6> preserves microseconds + if (!row.timestampUsString.includes(".123456")) { + throw new Error( + `Expected timestampUsString to preserve microseconds (.123456), got: ${row.timestampUsString}`, + ); + } + + // Verify that DateTime64String<9> preserves nanoseconds + if (!row.timestampNs.includes(".123456789")) { + throw new Error( + `Expected timestampNs to preserve nanoseconds (.123456789), got: ${row.timestampNs}`, + ); + } + + console.log( + "✅ DateTime precision test passed - microseconds preserved", + ); + }); } } else { it("should successfully ingest data and verify through consumption API", async function () { @@ -1341,6 +1490,121 @@ const createTemplateTestSuite = (config: TemplateTestConfig) => { const apiData = await apiResponse.json(); expect(apiData).to.be.an("array"); }); + + // DateTime precision test for Python + it("should preserve microsecond precision with clickhouse_datetime64 annotations via streaming transform (PY)", async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + const testId = randomUUID(); + const now = new Date(); + // Create ISO string with microseconds: 2024-01-15T10:30:00.123456Z + const timestampWithMicroseconds = now + .toISOString() + .replace(/\.\d{3}Z$/, ".123456Z"); + // Nanoseconds + const timestampWithNanoseconds = now + .toISOString() + .replace(/\.\d{3}Z$/, ".123456789Z"); + + console.log( + `Testing DateTime precision (Python) with timestamp: ${timestampWithMicroseconds}`, + ); + + const payload = { + id: testId, + created_at: now.toISOString(), + timestamp_ms: timestampWithMicroseconds, + timestamp_us: timestampWithMicroseconds, + timestamp_ns: timestampWithNanoseconds, + }; + console.log("Sending payload:", JSON.stringify(payload, null, 2)); + + // Ingest to DateTimePrecisionInput (which has a transform to Output) + const response = await fetch( + `${SERVER_CONFIG.url}/ingest/datetimeprecisioninput`, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }, + ); + + if (!response.ok) { + const text = await response.text(); + throw new Error( + `Failed to ingest DateTimePrecisionInput (Python): ${response.status}: ${text}`, + ); + } + + // Wait for transform to process and write to output table + await waitForDBWrite( + devProcess!, + "DateTimePrecisionOutput", + 1, + 60_000, + "local", + ); + + // Query the output data and verify precision + const client = createClient(CLICKHOUSE_CONFIG); + const result = await client.query({ + query: ` + SELECT + id, + toString(created_at) as created_at, + toString(timestamp_ms) as timestamp_ms, + toString(timestamp_us) as timestamp_us, + toString(timestamp_ns) as timestamp_ns + FROM local.DateTimePrecisionOutput + WHERE id = '${testId}' + `, + format: "JSONEachRow", + }); + + const data: any[] = await result.json(); + + if (data.length === 0) { + throw new Error( + `No data found for DateTimePrecisionOutput (Python) with id ${testId}`, + ); + } + + const row = data[0]; + console.log("Retrieved row (Python):", JSON.stringify(row, null, 2)); + + // Verify that datetime with clickhouse_datetime64(6) preserves microseconds + if (!row.timestamp_us.includes(".123456")) { + throw new Error( + `Expected timestamp_us to preserve microseconds (.123456), got: ${row.timestamp_us}`, + ); + } + + // Note: Python datetime truncates nanoseconds to microseconds, so we expect .123456 not .123456789 + // Log if nanoseconds were truncated (expected behavior) + if (row.timestamp_ns.includes(".123456789")) { + console.log( + "✅ Nanoseconds preserved in ClickHouse:", + row.timestamp_ns, + ); + } else if (row.timestamp_ns.includes(".123456")) { + console.log( + "⚠️ Nanoseconds truncated to microseconds (expected Python behavior):", + row.timestamp_ns, + ); + } else { + console.log( + "❌ No sub-second precision found in timestamp_ns:", + row.timestamp_ns, + ); + throw new Error( + `Expected timestamp_ns to have at least microseconds (.123456), got: ${row.timestamp_ns}`, + ); + } + + console.log( + "✅ DateTime precision test passed (Python) - microseconds preserved", + ); + }); } } }); diff --git a/apps/framework-cli-e2e/test/utils/schema-definitions.ts b/apps/framework-cli-e2e/test/utils/schema-definitions.ts index ce622995ee..299799ff95 100644 --- a/apps/framework-cli-e2e/test/utils/schema-definitions.ts +++ b/apps/framework-cli-e2e/test/utils/schema-definitions.ts @@ -421,6 +421,25 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "payloadBasic", type: "JSON(count Int64, name String)" }, ], }, + // Primary Key Expression Tests + { + tableName: "PrimaryKeyExpressionTest", + columns: [ + { name: "userId", type: "String" }, + { name: "eventId", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + { name: "category", type: "String" }, + ], + }, + { + tableName: "PrimaryKeyOrderingTest", + columns: [ + { name: "productId", type: "String" }, + { name: "category", type: "String" }, + { name: "brand", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + ], + }, ]; // ============ PYTHON TEMPLATE SCHEMA DEFINITIONS ============ @@ -805,6 +824,25 @@ export const PYTHON_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "payload_basic", type: "JSON(count Int64, name String)" }, ], }, + // Primary Key Expression Tests + { + tableName: "PrimaryKeyExpressionTest", + columns: [ + { name: "user_id", type: "String" }, + { name: "event_id", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + { name: "category", type: "String" }, + ], + }, + { + tableName: "PrimaryKeyOrderingTest", + columns: [ + { name: "product_id", type: "String" }, + { name: "category", type: "String" }, + { name: "brand", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + ], + }, ]; // ============ HELPER FUNCTIONS ============ diff --git a/apps/framework-cli/Cargo.toml b/apps/framework-cli/Cargo.toml index 99c977e266..2d3a86015e 100644 --- a/apps/framework-cli/Cargo.toml +++ b/apps/framework-cli/Cargo.toml @@ -12,7 +12,7 @@ homepage = "https://www.fiveonefour.com/moose" [dependencies] posthog514client-rs = { path = "../../packages/posthog514client-rs" } -sqlparser = "0.58.0" +sqlparser = { version = "0.59", features = ["visitor"] } itertools = "0.13.0" openssl = { version = "0.10", features = ["vendored"] } clap = { version = "4.3.17", features = ["derive"] } @@ -28,13 +28,10 @@ serde = { version = "1.0", features = ["derive"] } config = { version = "0.13.1", features = ["toml"] } home = "0.5.5" clickhouse = { version = "0.14.0", features = ["uuid", "native-tls"] } -clickhouse-rs = { version = "1.1.0-alpha.1", features = ["tls"] } handlebars = "5.1" rdkafka = { version = "0.38", features = ["ssl"] } rdkafka-sys = "4.7" # Needed for rd_kafka_wait_destroyed convert_case = "0.6.0" -log = "0.4" -fern = { version = "0.7", features = ["date-based"] } humantime = "2.1.0" uuid = { version = "1.6", features = ["v4", "serde"] } serde_json = { version = "1.0.133", features = ["preserve_order"] } @@ -86,6 +83,7 @@ logos = "0.15.0" # Monitoring tracing = "0.1.40" +tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] } opentelemetry_sdk = { version = "0.29.0", features = [ "logs", "metrics", @@ -94,7 +92,7 @@ opentelemetry_sdk = { version = "0.29.0", features = [ ] } opentelemetry-semantic-conventions = "0.29" opentelemetry = "0.29" -opentelemetry-appender-log = "0.29" +opentelemetry-appender-tracing = "0.29.0" opentelemetry-otlp = { version = "0.29.0", default-features = false, features = [ "logs", "metrics", diff --git a/apps/framework-cli/src/cli.rs b/apps/framework-cli/src/cli.rs index c632407248..b0c7aaaefe 100644 --- a/apps/framework-cli/src/cli.rs +++ b/apps/framework-cli/src/cli.rs @@ -18,7 +18,6 @@ use commands::{ }; use config::ConfigError; use display::with_spinner_completion; -use log::{debug, info, warn}; use regex::Regex; use routines::auth::generate_hash_token; use routines::build::build_package; @@ -28,11 +27,13 @@ use routines::kafka_pull::write_external_topics; use routines::metrics_console::run_console; use routines::peek::peek; use routines::ps::show_processes; +use routines::query::query; use routines::scripts::{ cancel_workflow, get_workflow_status, list_workflows_history, pause_workflow, run_workflow, terminate_workflow, unpause_workflow, }; use routines::templates::list_available_templates; +use tracing::{debug, info, warn}; use settings::Settings; use std::collections::HashMap; @@ -252,7 +253,13 @@ fn override_project_config_from_url( ) })?; + let clusters = project.clickhouse_config.clusters.clone(); + let additional_databases = project.clickhouse_config.additional_databases.clone(); + project.clickhouse_config = clickhouse_config; + project.clickhouse_config.clusters = clusters; + project.clickhouse_config.additional_databases = additional_databases; + info!( "Overriding project ClickHouse config from CLI: database = {}", project.clickhouse_config.db_name @@ -394,7 +401,7 @@ pub async fn top_command_handler( let save_choice = prompt_user( "\n Would you like to save this connection string to your system keychain for easy `moose db pull` later? [Y/n]", Some("Y"), - Some("You can always pass --connection-string explicitly to override."), + Some("You can always pass --clickhouse-url explicitly to override."), )?; let save = save_choice.trim().is_empty() @@ -421,7 +428,7 @@ pub async fn top_command_handler( let success_message = if let Some(connection_string) = normalized_url { format!( - "\n\n{post_install_message}\n\n🔗 Your ClickHouse connection string:\n{}\n\n📋 After setting up your development environment, open a new terminal and seed your local database:\n moose seed clickhouse --connection-string \"{}\" --limit 1000\n\n💡 Tip: Save the connection string as an environment variable for future use:\n export MOOSE_REMOTE_CLICKHOUSE_URL=\"{}\"\n", + "\n\n{post_install_message}\n\n🔗 Your ClickHouse connection string:\n{}\n\n📋 After setting up your development environment, open a new terminal and seed your local database:\n moose seed clickhouse --clickhouse-url \"{}\" --limit 1000\n\n💡 Tip: Save the connection string as an environment variable for future use:\n export MOOSE_REMOTE_CLICKHOUSE_URL=\"{}\"\n", connection_string, connection_string, connection_string @@ -1256,7 +1263,7 @@ pub async fn top_command_handler( Commands::Db(DbArgs { command: DbCommands::Pull { - connection_string, + clickhouse_url, file_path, }, }) => { @@ -1270,7 +1277,7 @@ pub async fn top_command_handler( machine_id.clone(), HashMap::new(), ); - let resolved_connection_string: String = match connection_string { + let resolved_clickhouse_url: String = match clickhouse_url { Some(s) => s.clone(), None => { let repo = KeyringSecretRepository; @@ -1278,13 +1285,13 @@ pub async fn top_command_handler( Ok(Some(s)) => s, Ok(None) => return Err(RoutineFailure::error(Message { action: "DB Pull".to_string(), - details: "No connection string provided and none saved. Pass --connection-string or save one during `moose init --from-remote`.".to_string(), + details: "No ClickHouse URL provided and none saved. Pass --clickhouse-url or save one during `moose init --from-remote`.".to_string(), })), Err(e) => { return Err(RoutineFailure::error(Message { action: "DB Pull".to_string(), details: format!( - "Failed to read saved connection string from keychain: {e:?}" + "Failed to read saved ClickHouse URL from keychain: {e:?}" ), })); } @@ -1292,7 +1299,7 @@ pub async fn top_command_handler( } }; - db_pull(&resolved_connection_string, &project, file_path.as_deref()) + db_pull(&resolved_clickhouse_url, &project, file_path.as_deref()) .await .map_err(|e| { RoutineFailure::new( @@ -1357,6 +1364,40 @@ pub async fn top_command_handler( ))) } }, + Commands::Query { + query: sql, + file, + limit, + format_query, + prettify, + } => { + info!("Running query command"); + + let project = load_project(commands)?; + let project_arc = Arc::new(project); + + let capture_handle = crate::utilities::capture::capture_usage( + ActivityType::QueryCommand, + Some(project_arc.name()), + &settings, + machine_id.clone(), + HashMap::new(), + ); + + let result = query( + project_arc, + sql.clone(), + file.clone(), + *limit, + format_query.clone(), + *prettify, + ) + .await; + + wait_for_usage_capture(capture_handle).await; + + result + } } } diff --git a/apps/framework-cli/src/cli/commands.rs b/apps/framework-cli/src/cli/commands.rs index 88c7523ac3..02b5247fee 100644 --- a/apps/framework-cli/src/cli/commands.rs +++ b/apps/framework-cli/src/cli/commands.rs @@ -193,6 +193,27 @@ pub enum Commands { }, /// Manage Kafka-related operations Kafka(KafkaArgs), + /// Execute SQL queries against ClickHouse + Query { + /// SQL query to execute + query: Option, + + /// Read query from file + #[arg(short = 'f', long = "file", conflicts_with = "query")] + file: Option, + + /// Maximum number of rows to return (applied via ClickHouse settings) + #[arg(short, long, default_value = "10000")] + limit: u64, + + /// Format query as code literal (python|typescript). Skips execution. + #[arg(short = 'c', long = "format-query", value_name = "LANGUAGE")] + format_query: Option, + + /// Prettify SQL before formatting (only with --format-query) + #[arg(short = 'p', long = "prettify", requires = "format_query")] + prettify: bool, + }, } #[derive(Debug, Args)] @@ -341,9 +362,9 @@ pub struct SeedCommands { pub enum SeedSubcommands { /// Seed ClickHouse tables with data Clickhouse { - /// ClickHouse connection string (e.g. 'clickhouse://explorer@play.clickhouse.com:9440/default') - #[arg(long, value_name = "CONNECTION_STRING")] - connection_string: Option, + /// ClickHouse connection URL (e.g. 'clickhouse://explorer@play.clickhouse.com:9440/default') + #[arg(long, alias = "connection-string")] + clickhouse_url: Option, /// Limit the number of rows to copy per table (default: 1000) #[arg( long, @@ -375,9 +396,9 @@ pub struct DbArgs { pub enum DbCommands { /// Update DB schema for EXTERNALLY_MANAGED tables Pull { - /// ClickHouse connection string (e.g. 'E.g. https://play.clickhouse.com/?user=explorer') - #[arg(long, value_name = "CONNECTION_STRING")] - connection_string: Option, + /// ClickHouse connection URL (e.g. 'https://play.clickhouse.com/?user=explorer') + #[arg(long, alias = "connection-string")] + clickhouse_url: Option, /// File storing the EXTERNALLY_MANAGED table definitions, defaults to app/external_models.py or app/externalModels.ts #[arg(long)] file_path: Option, diff --git a/apps/framework-cli/src/cli/display/infrastructure.rs b/apps/framework-cli/src/cli/display/infrastructure.rs index 14859fc927..85b4521c2d 100644 --- a/apps/framework-cli/src/cli/display/infrastructure.rs +++ b/apps/framework-cli/src/cli/display/infrastructure.rs @@ -36,7 +36,7 @@ use crate::framework::core::{ plan::InfraPlan, }; use crossterm::{execute, style::Print}; -use log::info; +use tracing::info; /// Create the detail indentation string at compile time /// Computed from ACTION_WIDTH (15) + 3 spaces: @@ -236,11 +236,17 @@ fn format_table_display( details.push(format!("Order by: {}", table.order_by)); } - // Engine section (if present) - if let Some(ref engine) = table.engine { - details.push(format!("Engine: {}", Into::::into(engine.clone()))); + // Cluster section (if present) + if let Some(ref cluster) = table.cluster_name { + details.push(format!("Cluster: {}", cluster)); } + // Engine section + details.push(format!( + "Engine: {}", + Into::::into(table.engine.clone()) + )); + (title, details) } diff --git a/apps/framework-cli/src/cli/display/message_display.rs b/apps/framework-cli/src/cli/display/message_display.rs index 8061836679..909c50955b 100644 --- a/apps/framework-cli/src/cli/display/message_display.rs +++ b/apps/framework-cli/src/cli/display/message_display.rs @@ -7,7 +7,7 @@ use super::{ message::{Message, MessageType}, terminal::{write_styled_line, StyledText}, }; -use log::info; +use tracing::info; /// Displays a message about a batch database insertion. /// diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 36ab73027a..4ac5e6225e 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -60,8 +60,6 @@ use hyper::Response; use hyper::StatusCode; use hyper_util::rt::TokioIo; use hyper_util::{rt::TokioExecutor, server::conn::auto}; -use log::{debug, log, trace}; -use log::{error, info, warn}; use rdkafka::error::KafkaError; use rdkafka::producer::future_producer::OwnedDeliveryResult; use rdkafka::producer::{DeliveryFuture, FutureProducer, FutureRecord, Producer}; @@ -73,12 +71,12 @@ use serde::Serialize; use serde::{Deserialize, Deserializer}; use serde_json::{json, Deserializer as JsonDeserializer, Value}; use tokio::spawn; +use tracing::{debug, error, info, trace, warn}; use crate::framework::data_model::model::DataModel; use crate::utilities::validate_passthrough::{DataModelArrayVisitor, DataModelVisitor}; use hyper_util::server::graceful::GracefulShutdown; use lazy_static::lazy_static; -use log::Level::{Debug, Trace}; use std::collections::{HashMap, HashSet}; use std::env; use std::env::VarError; @@ -1295,7 +1293,7 @@ async fn send_to_kafka>>( let mut temp_res: Vec> = Vec::new(); for (count, payload) in records.enumerate() { - log::trace!("Sending payload {:?} to topic: {}", payload, topic_name); + tracing::trace!("Sending payload {:?} to topic: {}", payload, topic_name); let record = FutureRecord::to(topic_name) .key(topic_name) // This should probably be generated by the client that pushes data to the API .payload(payload.as_slice()); @@ -1946,17 +1944,21 @@ async fn management_router( req: Request, max_request_body_size: usize, ) -> Result>, hyper::http::Error> { - let level = if req.uri().path().ends_with(METRICS_LOGS_PATH) { - Trace // too many lines of log created without user interaction + // Use appropriate log level based on path + // TRACE for metrics logs to reduce noise, DEBUG for other requests + if req.uri().path().ends_with(METRICS_LOGS_PATH) { + tracing::trace!( + "-> HTTP Request: {:?} - {:?}", + req.method(), + req.uri().path(), + ); } else { - Debug - }; - log!( - level, - "-> HTTP Request: {:?} - {:?}", - req.method(), - req.uri().path(), - ); + tracing::debug!( + "-> HTTP Request: {:?} - {:?}", + req.method(), + req.uri().path(), + ); + } let route = get_path_without_prefix(PathBuf::from(req.uri().path()), path_prefix); let route = route.to_str().unwrap(); @@ -2210,7 +2212,7 @@ impl Webserver { route_table: &'static RwLock>, consumption_apis: &'static RwLock>, ) -> mpsc::Sender<(InfrastructureMap, ApiChange)> { - log::info!("Spawning API update listener"); + tracing::info!("Spawning API update listener"); let (tx, mut rx) = mpsc::channel::<(InfrastructureMap, ApiChange)>(32); @@ -2219,7 +2221,7 @@ impl Webserver { let mut route_table = route_table.write().await; match api_change { ApiChange::ApiEndpoint(Change::Added(api_endpoint)) => { - log::info!("Adding route: {:?}", api_endpoint.path); + tracing::info!("Adding route: {:?}", api_endpoint.path); match api_endpoint.api_type { APIType::INGRESS { target_topic_id, @@ -2273,7 +2275,7 @@ impl Webserver { } } ApiChange::ApiEndpoint(Change::Removed(api_endpoint)) => { - log::info!("Removing route: {:?}", api_endpoint.path); + tracing::info!("Removing route: {:?}", api_endpoint.path); match api_endpoint.api_type { APIType::INGRESS { .. } => { route_table.remove(&api_endpoint.path); @@ -2294,7 +2296,7 @@ impl Webserver { data_model, schema: _, } => { - log::info!("Replacing route: {:?} with {:?}", before, after); + tracing::info!("Replacing route: {:?} with {:?}", before, after); let topic = infra_map .find_topic_by_id(target_topic_id) @@ -2347,28 +2349,28 @@ impl Webserver { &self, web_apps: &'static RwLock>, ) -> mpsc::Sender { - log::info!("Spawning WebApp update listener"); + tracing::info!("Spawning WebApp update listener"); let (tx, mut rx) = mpsc::channel::(32); tokio::spawn(async move { while let Some(webapp_change) = rx.recv().await { - log::info!("🔔 Received WebApp change: {:?}", webapp_change); + tracing::info!("🔔 Received WebApp change: {:?}", webapp_change); match webapp_change { crate::framework::core::infrastructure_map::WebAppChange::WebApp( crate::framework::core::infrastructure_map::Change::Added(webapp), ) => { - log::info!("Adding WebApp mount path: {:?}", webapp.mount_path); + tracing::info!("Adding WebApp mount path: {:?}", webapp.mount_path); web_apps.write().await.insert(webapp.mount_path.clone()); - log::info!("✅ Current web_apps: {:?}", *web_apps.read().await); + tracing::info!("✅ Current web_apps: {:?}", *web_apps.read().await); } crate::framework::core::infrastructure_map::WebAppChange::WebApp( crate::framework::core::infrastructure_map::Change::Removed(webapp), ) => { - log::info!("Removing WebApp mount path: {:?}", webapp.mount_path); + tracing::info!("Removing WebApp mount path: {:?}", webapp.mount_path); web_apps.write().await.remove(&webapp.mount_path); - log::info!("✅ Current web_apps: {:?}", *web_apps.read().await); + tracing::info!("✅ Current web_apps: {:?}", *web_apps.read().await); } crate::framework::core::infrastructure_map::WebAppChange::WebApp( crate::framework::core::infrastructure_map::Change::Updated { @@ -2376,7 +2378,7 @@ impl Webserver { after, }, ) => { - log::info!( + tracing::info!( "Updating WebApp mount path: {:?} to {:?}", before.mount_path, after.mount_path @@ -2385,7 +2387,7 @@ impl Webserver { web_apps_guard.remove(&before.mount_path); web_apps_guard.insert(after.mount_path.clone()); drop(web_apps_guard); - log::info!("✅ Current web_apps: {:?}", *web_apps.read().await); + tracing::info!("✅ Current web_apps: {:?}", *web_apps.read().await); } } } @@ -3301,21 +3303,25 @@ async fn get_admin_reconciled_inframap( return Ok(current_map); } - // For admin endpoints, reconcile all currently managed tables only - // Pass the managed table names as target_table_names - this ensures that - // reconcile_with_reality only operates on tables that are already managed by Moose + // For admin endpoints, reconcile all currently managed tables and SQL resources only + // Pass the managed table IDs as target_table_ids - this ensures that + // reconcile_with_reality only operates on resources that are already managed by Moose let target_table_ids: HashSet = current_map .tables .values() .map(|t| t.id(¤t_map.default_database)) .collect(); + let target_sql_resource_ids: HashSet = + current_map.sql_resources.keys().cloned().collect(); + let olap_client = clickhouse::create_client(project.clickhouse_config.clone()); crate::framework::core::plan::reconcile_with_reality( project, ¤t_map, &target_table_ids, + &target_sql_resource_ids, olap_client, ) .await @@ -3525,6 +3531,7 @@ mod tests { use crate::framework::core::partial_infrastructure_map::LifeCycle; use crate::framework::versions::Version; use crate::infrastructure::olap::clickhouse::config::DEFAULT_DATABASE_NAME; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; fn create_test_table(name: &str) -> Table { Table { @@ -3543,7 +3550,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -3556,6 +3563,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, } } @@ -3570,6 +3579,9 @@ mod tests { unmapped_tables: vec![table.clone()], missing_tables: vec![], mismatched_tables: vec![OlapChange::Table(TableChange::Added(table.clone()))], + unmapped_sql_resources: vec![], + missing_sql_resources: vec![], + mismatched_sql_resources: vec![], }; let result = find_table_definition("test_table", &discrepancies); @@ -3586,6 +3598,9 @@ mod tests { unmapped_tables: vec![test_table.clone()], missing_tables: vec![], mismatched_tables: vec![OlapChange::Table(TableChange::Added(test_table.clone()))], + unmapped_sql_resources: vec![], + missing_sql_resources: vec![], + mismatched_sql_resources: vec![], }; let mut infra_map = create_test_infra_map(); @@ -3618,6 +3633,9 @@ mod tests { unmapped_tables: vec![test_table.clone()], missing_tables: vec![], mismatched_tables: vec![OlapChange::Table(TableChange::Added(test_table.clone()))], + unmapped_sql_resources: vec![], + missing_sql_resources: vec![], + mismatched_sql_resources: vec![], }; let mut infra_map = create_test_infra_map(); diff --git a/apps/framework-cli/src/cli/logger.rs b/apps/framework-cli/src/cli/logger.rs index 684c5fefbf..f5e60520a9 100644 --- a/apps/framework-cli/src/cli/logger.rs +++ b/apps/framework-cli/src/cli/logger.rs @@ -1,21 +1,64 @@ //! # Logger Module //! -//! This module provides logging functionality for the application. +//! This module provides logging functionality using `tracing-subscriber` with support for +//! dynamic log filtering via `RUST_LOG` and dual format support (legacy/modern). +//! +//! ## Architecture +//! +//! The logging system is built using `tracing-subscriber` layers: +//! - **EnvFilter Layer**: Provides `RUST_LOG` support for module-level filtering +//! - **Format Layer**: Either legacy (fern-compatible) or modern (tracing native) format +//! - **OTEL Layer**: Optional OpenTelemetry export for observability platforms //! //! ## Components //! //! - `LoggerLevel`: An enumeration representing the different levels of logging: DEBUG, INFO, WARN, and ERROR. -//! - `LoggerSettings`: A struct that holds the settings for the logger, including the log file's name and the logging level. +//! - `LogFormat`: Either Text or JSON output format. +//! - `LoggerSettings`: A struct that holds the settings for the logger, including format, level, and export options. //! - `setup_logging`: A function used to set up the logging system with the provided settings. +//! - `LegacyFormatLayer`: Custom layer that matches the old fern format exactly (for backward compatibility). +//! +//! ## Features +//! +//! ### RUST_LOG Support +//! Use the standard Rust `RUST_LOG` environment variable for dynamic filtering: +//! ```bash +//! RUST_LOG=moose_cli::infrastructure=debug cargo run +//! RUST_LOG=debug cargo run # Enable debug for all modules +//! ``` +//! +//! ### Dual Format Support +//! - **Legacy Format** (default): Maintains exact compatibility with the old fern-based logging +//! - Text: `[timestamp LEVEL - target] message` +//! - JSON: `{"timestamp": "...", "severity": "INFO", "target": "...", "message": "..."}` +//! - **Modern Format** (opt-in): Uses tracing-subscriber's native formatting +//! - Enable via `MOOSE_LOGGER__USE_TRACING_FORMAT=true` +//! +//! ### Additional Features +//! - **Date-based file rotation**: Daily log files in `~/.moose/YYYY-MM-DD-cli.log` +//! - **Automatic cleanup**: Deletes logs older than 7 days +//! - **Session ID tracking**: Optional per-session identifier in logs +//! - **Machine ID tracking**: Included in every log event +//! - **OpenTelemetry export**: Optional OTLP/HTTP JSON export to observability platforms +//! - **Configurable outputs**: File and/or stdout +//! +//! ## Environment Variables +//! +//! - `RUST_LOG`: Standard Rust log filtering (e.g., `RUST_LOG=moose_cli::infrastructure=debug`) +//! - `MOOSE_LOGGER__USE_TRACING_FORMAT`: Opt-in to modern format (default: `false`) +//! - `MOOSE_LOGGER__LEVEL`: Log level (DEBUG, INFO, WARN, ERROR) +//! - `MOOSE_LOGGER__STDOUT`: Output to stdout vs file (default: `false`) +//! - `MOOSE_LOGGER__FORMAT`: Text or JSON (default: Text) +//! - `MOOSE_LOGGER__EXPORT_TO`: OTEL endpoint URL +//! - `MOOSE_LOGGER__INCLUDE_SESSION_ID`: Include session ID in logs (default: `false`) //! //! ## Usage //! //! The logger is configured by creating a `LoggerSettings` instance and passing it to the `setup_logging` function. -//! The `LoggerSettings` can be configured with a log file and a log level. If these are not provided, default values are used. -//! The default log file is "cli.log" in the user's directory, and the default log level is INFO. -//! Use the macros to write to the log file. +//! Default values are provided for all settings. Use the `tracing::` macros to write logs. +//! +//! ### Log Levels //! -//! The log levels have the following uses: //! - `DEBUG`: Use this level for detailed information typically of use only when diagnosing problems. You would usually only expect to see these logs in a development environment. For example, you might log method entry/exit points, variable values, query results, etc. //! - `INFO`: Use this level to confirm that things are working as expected. This is the default log level and will give you general operational insights into the application behavior. For example, you might log start/stop of a process, configuration details, successful completion of significant transactions, etc. //! - `WARN`: Use this level when something unexpected happened in the system, or there might be a problem in the near future (like 'disk space low'). The software is still working as expected, so it's not an error. For example, you might log deprecated API usage, poor performance issues, retrying an operation, etc. @@ -24,31 +67,42 @@ //! ## Example //! //! ```rust +//! use tracing::{debug, info, warn, error}; +//! //! debug!("This is a DEBUG message. Typically used for detailed information useful in a development environment."); //! info!("This is an INFO message. Used to confirm that things are working as expected."); //! warn!("This is a WARN message. Indicates something unexpected happened or there might be a problem in the near future."); //! error!("This is an ERROR message. Used when the system is in distress, customers are probably being affected but the program is not terminated."); //! ``` //! +//! ## Backward Compatibility +//! +//! The legacy format layer ensures 100% backward compatibility with systems consuming the old +//! fern-based log format (e.g., Boreal/hosting_telemetry). The modern format can be enabled +//! via environment variable once downstream consumers are ready. use hyper::Uri; -use log::{error, warn}; -use log::{LevelFilter, Metadata, Record}; -use opentelemetry::logs::Logger; use opentelemetry::KeyValue; -use opentelemetry_appender_log::OpenTelemetryLogBridge; -use opentelemetry_otlp::{Protocol, WithExportConfig, WithHttpConfig}; -use opentelemetry_sdk::logs::SdkLoggerProvider; +use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; +use opentelemetry_otlp::{Protocol, WithExportConfig}; +use opentelemetry_sdk::logs::{BatchLogProcessor, SdkLoggerProvider}; use opentelemetry_sdk::Resource; use opentelemetry_semantic_conventions::resource::SERVICE_NAME; use serde::Deserialize; -use serde_json::Value; use std::env; -use std::env::VarError; +use std::fmt; +use std::io::Write; use std::time::{Duration, SystemTime}; +use tracing::field::{Field, Visit}; +use tracing::{warn, Event, Level, Subscriber}; +use tracing_subscriber::filter::LevelFilter; +use tracing_subscriber::fmt::MakeWriter; +use tracing_subscriber::layer::{Context, SubscriberExt}; +use tracing_subscriber::registry::LookupSpan; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::{EnvFilter, Layer}; use crate::utilities::constants::{CONTEXT, CTX_SESSION_ID}; -use crate::utilities::decode_object; use super::settings::user_directory; @@ -68,12 +122,12 @@ pub enum LoggerLevel { } impl LoggerLevel { - pub fn to_log_level(&self) -> log::LevelFilter { + pub fn to_tracing_level(&self) -> LevelFilter { match self { - LoggerLevel::Debug => log::LevelFilter::Debug, - LoggerLevel::Info => log::LevelFilter::Info, - LoggerLevel::Warn => log::LevelFilter::Warn, - LoggerLevel::Error => log::LevelFilter::Error, + LoggerLevel::Debug => LevelFilter::DEBUG, + LoggerLevel::Info => LevelFilter::INFO, + LoggerLevel::Warn => LevelFilter::WARN, + LoggerLevel::Error => LevelFilter::ERROR, } } } @@ -101,6 +155,9 @@ pub struct LoggerSettings { #[serde(default = "default_include_session_id")] pub include_session_id: bool, + + #[serde(default = "default_use_tracing_format")] + pub use_tracing_format: bool, } fn parsing_url<'de, D>(deserializer: D) -> Result, D::Error> @@ -131,6 +188,13 @@ fn default_include_session_id() -> bool { false } +fn default_use_tracing_format() -> bool { + env::var("MOOSE_LOGGER__USE_TRACING_FORMAT") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(false) +} + impl Default for LoggerSettings { fn default() -> Self { LoggerSettings { @@ -140,6 +204,7 @@ impl Default for LoggerSettings { format: default_log_format(), export_to: None, include_session_id: default_include_session_id(), + use_tracing_format: default_use_tracing_format(), } } } @@ -193,158 +258,406 @@ fn clean_old_logs() { // Error that rolls up all the possible errors that can occur during logging setup #[derive(thiserror::Error, Debug)] pub enum LoggerError { - #[error("Error Initializing fern logger")] - Init(#[from] fern::InitError), - #[error("Error setting up otel logger")] - Exporter(#[from] opentelemetry_sdk::error::OTelSdkError), - #[error("Error building the exporter")] - ExporterBuild(#[from] opentelemetry_otlp::ExporterBuildError), - #[error("Error setting up default logger")] - LogSetup(#[from] log::SetLoggerError), + #[error("Error setting up OTEL logger: {0}")] + OtelSetup(String), } -pub fn setup_logging(settings: &LoggerSettings, machine_id: &str) -> Result<(), LoggerError> { - clean_old_logs(); +/// Custom fields that get injected into every log event +#[derive(Clone)] +struct CustomFields { + session_id: String, + #[allow(dead_code)] // Will be used when OTEL support is re-enabled + machine_id: String, +} - let session_id = CONTEXT.get(CTX_SESSION_ID).unwrap(); - let include_session_id = settings.include_session_id; - - let base_config = fern::Dispatch::new().level(settings.level.to_log_level()); - - let format_config = if settings.format == LogFormat::Text { - fern::Dispatch::new().format(move |out, message, record| { - out.finish(format_args!( - "[{} {}{} - {}] {}", - humantime::format_rfc3339_seconds(SystemTime::now()), - record.level(), - if include_session_id { - format!(" {}", &session_id) - } else { - String::new() - }, - record.target(), - message - )) - }) - } else { - fern::Dispatch::new().format(move |out, message, record| { - let mut log_json = serde_json::json!({ - "timestamp": chrono::Utc::now().to_rfc3339(), - "severity": record.level().to_string(), - "target": record.target(), - "message": message, - }); - - if include_session_id { - log_json["session_id"] = serde_json::Value::String(session_id.to_string()); +/// Layer that formats logs to match the legacy fern format exactly +struct LegacyFormatLayer { + writer: W, + format: LogFormat, + include_session_id: bool, + custom_fields: CustomFields, +} + +impl LegacyFormatLayer { + fn new( + writer: W, + format: LogFormat, + include_session_id: bool, + custom_fields: CustomFields, + ) -> Self { + Self { + writer, + format, + include_session_id, + custom_fields, + } + } + + fn format_text(&self, level: &Level, target: &str, message: &str) -> String { + // Match current fern text format exactly + format!( + "[{} {}{} - {}] {}", + humantime::format_rfc3339_seconds(SystemTime::now()), + level, + if self.include_session_id { + format!(" {}", self.custom_fields.session_id) + } else { + String::new() + }, + target, + message + ) + } + + fn format_json(&self, level: &Level, target: &str, message: &str) -> String { + // Match current fern JSON format exactly + let mut log_json = serde_json::json!({ + "timestamp": chrono::Utc::now().to_rfc3339(), + "severity": level.to_string(), + "target": target, + "message": message, + }); + + if self.include_session_id { + log_json["session_id"] = + serde_json::Value::String(self.custom_fields.session_id.clone()); + } + + serde_json::to_string(&log_json) + .expect("formatting `serde_json::Value` with string keys never fails") + } +} + +impl Layer for LegacyFormatLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, + W: for<'writer> MakeWriter<'writer> + 'static, +{ + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + // Extract metadata + let metadata = event.metadata(); + let level = metadata.level(); + let target = metadata.target(); + + // Extract message using visitor + let mut visitor = MessageVisitor::default(); + event.record(&mut visitor); + let message = visitor.message; + + // Format based on LogFormat + let output = if self.format == LogFormat::Text { + self.format_text(level, target, &message) + } else { + self.format_json(level, target, &message) + }; + + // Write to output + let mut writer = self.writer.make_writer(); + let _ = writer.write_all(output.as_bytes()); + let _ = writer.write_all(b"\n"); + } +} + +#[derive(Default)] +struct MessageVisitor { + message: String, +} + +impl Visit for MessageVisitor { + fn record_debug(&mut self, field: &Field, value: &dyn fmt::Debug) { + if field.name() == "message" { + self.message = format!("{:?}", value); + // Remove surrounding quotes from debug format + if self.message.starts_with('"') && self.message.ends_with('"') { + self.message = self.message[1..self.message.len() - 1].to_string(); } + } + } +} - out.finish(format_args!( - "{}", - serde_json::to_string(&log_json) - .expect("formatting `serde_json::Value` with string keys never fails") - )) - }) - }; +/// Custom MakeWriter that creates log files with user-specified date format +/// +/// This maintains backward compatibility with fern's DateBased rotation by allowing +/// custom date format strings like "%Y-%m-%d-cli.log" to produce "2025-11-25-cli.log" +struct DateBasedWriter { + date_format: String, +} - let output_config = if settings.stdout { - format_config.chain(std::io::stdout()) - } else { - format_config.chain(fern::DateBased::new( - // `.join("")` is an idempotent way to ensure the path ends with '/' - user_directory().join("").to_str().unwrap(), - settings.log_file_date_format.clone(), - )) - }; +impl DateBasedWriter { + fn new(date_format: String) -> Self { + Self { date_format } + } +} + +impl<'a> MakeWriter<'a> for DateBasedWriter { + type Writer = std::fs::File; + + fn make_writer(&'a self) -> Self::Writer { + let formatted_name = chrono::Local::now().format(&self.date_format).to_string(); + let file_path = user_directory().join(&formatted_name); + + std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(file_path) + .expect("Failed to open log file") + } +} + +/// Creates a rolling file appender with custom date format +/// +/// This function creates a file appender that respects the configured date format +/// for log file naming, maintaining backward compatibility with fern's DateBased rotation. +fn create_rolling_file_appender(date_format: &str) -> DateBasedWriter { + DateBasedWriter::new(date_format.to_string()) +} - let output_config = match &settings.export_to { - None => output_config, - Some(otel_endpoint) => { - let string_uri = otel_endpoint.to_string(); - let reqwest_client = reqwest::blocking::Client::new(); - - let open_telemetry_exporter = opentelemetry_otlp::LogExporter::builder() - .with_http() - .with_http_client(reqwest_client) - .with_endpoint(string_uri) - .with_protocol(Protocol::HttpJson) - .with_timeout(Duration::from_millis(5000)) - .build()?; - - let mut resource_attributes = vec![ - KeyValue::new(SERVICE_NAME, "moose-cli"), - KeyValue::new("session_id", session_id.as_str()), - KeyValue::new("machine_id", String::from(machine_id)), - ]; - match env::var("MOOSE_METRIC__LABELS") { - Ok(base64) => match decode_object::decode_base64_to_json(&base64) { - Ok(Value::Object(labels)) => { - for (key, value) in labels { - if let Some(value_str) = value.as_str() { - resource_attributes.push(KeyValue::new(key, value_str.to_string())); - } - } +/// Creates an OpenTelemetry layer for log export +/// +/// This function sets up OTLP log export using opentelemetry-appender-tracing. +/// It creates a LoggerProvider with a batch processor and OTLP exporter. +fn create_otel_layer( + endpoint: &Uri, + session_id: &str, + machine_id: &str, +) -> Result, LoggerError> { + use crate::utilities::decode_object; + use serde_json::Value; + use std::env::VarError; + + // Create base resource attributes + let mut resource_attributes = vec![ + KeyValue::new(SERVICE_NAME, "moose-cli"), + KeyValue::new("session_id", session_id.to_string()), + KeyValue::new("machine_id", machine_id.to_string()), + ]; + + // Add labels from MOOSE_METRIC__LABELS environment variable + match env::var("MOOSE_METRIC__LABELS") { + Ok(base64) => match decode_object::decode_base64_to_json(&base64) { + Ok(Value::Object(labels)) => { + for (key, value) in labels { + if let Some(value_str) = value.as_str() { + resource_attributes.push(KeyValue::new(key, value_str.to_string())); } - Ok(_) => warn!("Unexpected value for MOOSE_METRIC_LABELS"), - Err(e) => error!("Error decoding MOOSE_METRIC_LABELS: {}", e), - }, - Err(VarError::NotPresent) => {} - Err(VarError::NotUnicode(e)) => { - error!("MOOSE_METRIC__LABELS is not unicode: {:?}", e); } } - - let resource = Resource::builder() - .with_attributes(resource_attributes) - .build(); - let logger_provider = SdkLoggerProvider::builder() - .with_resource(resource) - .with_batch_exporter(open_telemetry_exporter) - .build(); - - let logger: Box = Box::new(TargetToKvLogger { - inner: OpenTelemetryLogBridge::new(&logger_provider), - }); - - fern::Dispatch::new().chain(output_config).chain( - fern::Dispatch::new() - // to prevent exporter recursively calls logging and thus itself - .level(LevelFilter::Off) - .level_for("moose_cli", settings.level.to_log_level()) - .chain(logger), - ) + Ok(_) => warn!("Unexpected value for MOOSE_METRIC_LABELS"), + Err(e) => { + warn!("Error decoding MOOSE_METRIC_LABELS: {}", e); + } + }, + Err(VarError::NotPresent) => {} + Err(VarError::NotUnicode(e)) => { + warn!("MOOSE_METRIC__LABELS is not unicode: {:?}", e); } - }; - base_config.chain(output_config).apply()?; + } - Ok(()) + // Create resource with all attributes + let resource = Resource::builder() + .with_attributes(resource_attributes) + .build(); + + // Build OTLP log exporter + let exporter = opentelemetry_otlp::LogExporter::builder() + .with_http() + .with_protocol(Protocol::HttpJson) + .with_endpoint(endpoint.to_string()) + .build() + .map_err(|e| LoggerError::OtelSetup(format!("Failed to build OTLP exporter: {}", e)))?; + + // Create logger provider with batch processor + let provider = SdkLoggerProvider::builder() + .with_resource(resource) + .with_log_processor(BatchLogProcessor::builder(exporter).build()) + .build(); + + // Create the tracing bridge layer + Ok(OpenTelemetryTracingBridge::new(&provider)) } -struct TargetToKvLogger -where - P: opentelemetry::logs::LoggerProvider + Send + Sync, - L: Logger + Send + Sync, -{ - inner: OpenTelemetryLogBridge, -} +pub fn setup_logging(settings: &LoggerSettings, machine_id: &str) -> Result<(), LoggerError> { + clean_old_logs(); -impl log::Log for TargetToKvLogger -where - P: opentelemetry::logs::LoggerProvider + Send + Sync, - L: Logger + Send + Sync, -{ - fn enabled(&self, metadata: &Metadata) -> bool { - self.inner.enabled(metadata) + let session_id = CONTEXT.get(CTX_SESSION_ID).unwrap(); + + // Create custom fields for use in formatters + let custom_fields = CustomFields { + session_id: session_id.to_string(), + machine_id: machine_id.to_string(), + }; + + // Setup logging based on format type + if settings.use_tracing_format { + // Modern format using tracing built-ins + setup_modern_format(settings, session_id, machine_id) + } else { + // Legacy format matching fern exactly + setup_legacy_format(settings, session_id, machine_id, custom_fields) } +} - fn log(&self, record: &Record) { - let mut with_target = record.to_builder(); - let kvs: &dyn log::kv::Source = &("target", record.target()); - with_target.key_values(kvs); - self.inner.log(&with_target.build()); +fn setup_modern_format( + settings: &LoggerSettings, + session_id: &str, + machine_id: &str, +) -> Result<(), LoggerError> { + let env_filter = EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::new(settings.level.to_tracing_level().to_string())); + + // Setup with or without OTEL based on configuration + if let Some(endpoint) = &settings.export_to { + let otel_layer = create_otel_layer(endpoint, session_id, machine_id)?; + + if settings.stdout { + let format_layer = tracing_subscriber::fmt::layer() + .with_writer(std::io::stdout) + .with_target(true) + .with_level(true); + + if settings.format == LogFormat::Json { + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(format_layer.json()) + .init(); + } else { + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(format_layer.compact()) + .init(); + } + } else { + let file_appender = create_rolling_file_appender(&settings.log_file_date_format); + let format_layer = tracing_subscriber::fmt::layer() + .with_writer(file_appender) + .with_target(true) + .with_level(true); + + if settings.format == LogFormat::Json { + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(format_layer.json()) + .init(); + } else { + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(format_layer.compact()) + .init(); + } + } + } else { + // No OTEL export + if settings.stdout { + let format_layer = tracing_subscriber::fmt::layer() + .with_writer(std::io::stdout) + .with_target(true) + .with_level(true); + + if settings.format == LogFormat::Json { + tracing_subscriber::registry() + .with(env_filter) + .with(format_layer.json()) + .init(); + } else { + tracing_subscriber::registry() + .with(env_filter) + .with(format_layer.compact()) + .init(); + } + } else { + let file_appender = create_rolling_file_appender(&settings.log_file_date_format); + let format_layer = tracing_subscriber::fmt::layer() + .with_writer(file_appender) + .with_target(true) + .with_level(true); + + if settings.format == LogFormat::Json { + tracing_subscriber::registry() + .with(env_filter) + .with(format_layer.json()) + .init(); + } else { + tracing_subscriber::registry() + .with(env_filter) + .with(format_layer.compact()) + .init(); + } + } } - fn flush(&self) { - self.inner.flush() + Ok(()) +} + +fn setup_legacy_format( + settings: &LoggerSettings, + _session_id: &str, + _machine_id: &str, + custom_fields: CustomFields, +) -> Result<(), LoggerError> { + let env_filter = EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::new(settings.level.to_tracing_level().to_string())); + + // Setup with or without OTEL based on configuration + if let Some(_endpoint) = &settings.export_to { + if settings.stdout { + let legacy_layer = LegacyFormatLayer::new( + std::io::stdout, + settings.format.clone(), + settings.include_session_id, + custom_fields, + ); + + tracing_subscriber::registry() + .with(env_filter) + .with(legacy_layer) + .init(); + } else { + let file_appender = create_rolling_file_appender(&settings.log_file_date_format); + let legacy_layer = LegacyFormatLayer::new( + file_appender, + settings.format.clone(), + settings.include_session_id, + custom_fields, + ); + + tracing_subscriber::registry() + .with(env_filter) + .with(legacy_layer) + .init(); + } + } else { + // No OTEL export + if settings.stdout { + let legacy_layer = LegacyFormatLayer::new( + std::io::stdout, + settings.format.clone(), + settings.include_session_id, + custom_fields.clone(), + ); + + tracing_subscriber::registry() + .with(env_filter) + .with(legacy_layer) + .init(); + } else { + let file_appender = create_rolling_file_appender(&settings.log_file_date_format); + let legacy_layer = LegacyFormatLayer::new( + file_appender, + settings.format.clone(), + settings.include_session_id, + custom_fields, + ); + + tracing_subscriber::registry() + .with(env_filter) + .with(legacy_layer) + .init(); + } } + + Ok(()) } diff --git a/apps/framework-cli/src/cli/processing_coordinator.rs b/apps/framework-cli/src/cli/processing_coordinator.rs index e865eef358..195ba7e8e5 100644 --- a/apps/framework-cli/src/cli/processing_coordinator.rs +++ b/apps/framework-cli/src/cli/processing_coordinator.rs @@ -63,9 +63,9 @@ impl ProcessingCoordinator { /// // Guard drops here, releasing write lock /// ``` pub async fn begin_processing(&self) -> ProcessingGuard { - log::debug!("[ProcessingCoordinator] Acquiring write lock for processing"); + tracing::debug!("[ProcessingCoordinator] Acquiring write lock for processing"); let write_guard = self.lock.clone().write_owned().await; - log::debug!("[ProcessingCoordinator] Write lock acquired, processing started"); + tracing::debug!("[ProcessingCoordinator] Write lock acquired, processing started"); ProcessingGuard { _write_guard: write_guard, @@ -86,9 +86,9 @@ impl ProcessingCoordinator { /// // Now safe to read from Redis, ClickHouse, etc. /// ``` pub async fn wait_for_stable_state(&self) { - log::trace!("[ProcessingCoordinator] Waiting for stable state (acquiring read lock)"); + tracing::trace!("[ProcessingCoordinator] Waiting for stable state (acquiring read lock)"); let _read_guard = self.lock.read().await; - log::trace!("[ProcessingCoordinator] State is stable (read lock acquired)"); + tracing::trace!("[ProcessingCoordinator] State is stable (read lock acquired)"); // Read lock is dropped here, allowing processing to proceed if needed } } @@ -109,7 +109,7 @@ pub struct ProcessingGuard { impl Drop for ProcessingGuard { fn drop(&mut self) { - log::debug!("[ProcessingCoordinator] Processing complete, releasing write lock"); + tracing::debug!("[ProcessingCoordinator] Processing complete, releasing write lock"); // Write guard drops automatically, releasing the lock } } diff --git a/apps/framework-cli/src/cli/routines/build.rs b/apps/framework-cli/src/cli/routines/build.rs index 2d94270740..4109d9ea06 100644 --- a/apps/framework-cli/src/cli/routines/build.rs +++ b/apps/framework-cli/src/cli/routines/build.rs @@ -26,21 +26,21 @@ /// } /// ``` use chrono::Local; -use log::{debug, error, info}; use std::fs; use std::path::Path; use std::path::PathBuf; use std::process::Command; +use tracing::{debug, error, info}; use crate::framework::languages::SupportedLanguages; use crate::project::Project; use crate::project::ProjectFileError; use crate::utilities::constants::LIB_DIR; use crate::utilities::constants::PACKAGE_JSON; +use crate::utilities::constants::PROJECT_CONFIG_FILE; use crate::utilities::constants::REQUIREMENTS_TXT; use crate::utilities::constants::SETUP_PY; use crate::utilities::constants::TSCONFIG_JSON; -use crate::utilities::constants::{APP_DIR, PROJECT_CONFIG_FILE}; use crate::utilities::package_managers::{detect_package_manager, get_lock_file_path}; use crate::utilities::system; use crate::utilities::system::copy_directory; @@ -192,10 +192,20 @@ pub fn build_package(project: &Project) -> Result { // Files to include in the package let files_to_copy = match project.language { SupportedLanguages::Typescript => { - vec![APP_DIR, PROJECT_CONFIG_FILE, PACKAGE_JSON, TSCONFIG_JSON] + vec![ + &project.source_dir, + PROJECT_CONFIG_FILE, + PACKAGE_JSON, + TSCONFIG_JSON, + ] } SupportedLanguages::Python => { - vec![APP_DIR, PROJECT_CONFIG_FILE, REQUIREMENTS_TXT, SETUP_PY] + vec![ + &project.source_dir, + PROJECT_CONFIG_FILE, + REQUIREMENTS_TXT, + SETUP_PY, + ] } }; diff --git a/apps/framework-cli/src/cli/routines/clean.rs b/apps/framework-cli/src/cli/routines/clean.rs index 540c676568..3bfdd472c6 100644 --- a/apps/framework-cli/src/cli/routines/clean.rs +++ b/apps/framework-cli/src/cli/routines/clean.rs @@ -1,6 +1,6 @@ use crate::utilities::docker::DockerClient; use crate::{cli::display::Message, cli::settings::Settings, project::Project}; -use log::info; +use tracing::info; use super::util::ensure_docker_running; use super::{RoutineFailure, RoutineSuccess}; diff --git a/apps/framework-cli/src/cli/routines/code_generation.rs b/apps/framework-cli/src/cli/routines/code_generation.rs index 0d0cbe9c3a..e5f4e0a8d5 100644 --- a/apps/framework-cli/src/cli/routines/code_generation.rs +++ b/apps/framework-cli/src/cli/routines/code_generation.rs @@ -7,19 +7,19 @@ use crate::framework::core::partial_infrastructure_map::LifeCycle; use crate::framework::languages::SupportedLanguages; use crate::framework::python::generate::tables_to_python; use crate::framework::typescript::generate::tables_to_typescript; -use crate::infrastructure::olap::clickhouse::ConfiguredDBClient; +use crate::infrastructure::olap::clickhouse::{create_client, ConfiguredDBClient}; use crate::infrastructure::olap::OlapOperations; use crate::project::Project; use crate::utilities::constants::{ - APP_DIR, PYTHON_EXTERNAL_FILE, PYTHON_MAIN_FILE, TYPESCRIPT_EXTERNAL_FILE, TYPESCRIPT_MAIN_FILE, + PYTHON_EXTERNAL_FILE, PYTHON_MAIN_FILE, TYPESCRIPT_EXTERNAL_FILE, TYPESCRIPT_MAIN_FILE, }; use crate::utilities::git::create_code_generation_commit; -use log::debug; -use reqwest::Url; +use clickhouse::Client; use std::borrow::Cow; use std::env; use std::io::Write; use std::path::Path; +use tracing::debug; pub fn prompt_user_for_remote_ch_http() -> Result { let base = prompt_user( @@ -59,90 +59,51 @@ fn should_be_externally_managed(table: &Table) -> bool { pub async fn create_client_and_db( remote_url: &str, ) -> Result<(ConfiguredDBClient, String), RoutineFailure> { - let mut url = Url::parse(remote_url).map_err(|e| { - RoutineFailure::error(Message::new( - "Invalid URL".to_string(), - format!("Failed to parse remote_url '{remote_url}': {e}"), - )) + use crate::infrastructure::olap::clickhouse::config::parse_clickhouse_connection_string_with_metadata; + + // Parse the connection string with metadata + let parsed = parse_clickhouse_connection_string_with_metadata(remote_url).map_err(|e| { + RoutineFailure::new( + Message::new( + "Invalid URL".to_string(), + format!("Failed to parse ClickHouse URL '{remote_url}'"), + ), + e, + ) })?; - if url.scheme() == "clickhouse" { + // Show user-facing message if native protocol was converted + if parsed.was_native_protocol { debug!("Only HTTP(s) supported. Transforming native protocol connection string."); - let is_secure = match (url.host_str(), url.port()) { - (_, Some(9000)) => false, - (_, Some(9440)) => true, - (Some(host), _) if host == "localhost" || host == "127.0.0.1" => false, - _ => true, - }; - let (new_port, new_scheme) = if is_secure { - (8443, "https") - } else { - (8123, "http") - }; - url = Url::parse(&remote_url.replacen("clickhouse", new_scheme, 1)).unwrap(); - url.set_port(Some(new_port)).unwrap(); - - let path_segments = url.path().split('/').collect::>(); - if path_segments.len() == 2 && path_segments[0].is_empty() { - let database = path_segments[1].to_string(); - url.set_path(""); - url.query_pairs_mut().append_pair("database", &database); - }; - - let display_url = if url.password().is_some() { - let mut cloned = url.clone(); - cloned.set_password(Some("******")).unwrap(); - Cow::Owned(cloned) - } else { - Cow::Borrowed(&url) - }; show_message!( MessageType::Highlight, Message { action: "Protocol".to_string(), - details: format!("native protocol detected. Converting to HTTP(s): {display_url}"), + details: format!( + "native protocol detected. Converting to HTTP(s): {}", + parsed.display_url + ), } ); } - let mut client = clickhouse::Client::default().with_url(remote_url); - let url_username = url.username(); - let url_username = if !url_username.is_empty() { - url_username.to_string() - } else { - match url.query_pairs().find(|(key, _)| key == "user") { - None => String::new(), - Some((_, v)) => v.to_string(), - } - }; - if !url_username.is_empty() { - client = client - .with_user(percent_encoding::percent_decode_str(&url_username).decode_utf8_lossy()) - } - if let Some(password) = url.password() { - client = client - .with_password(percent_encoding::percent_decode_str(password).decode_utf8_lossy()); - } - - let url_db = url - .query_pairs() - .filter_map(|(k, v)| { - if k == "database" { - Some(v.to_string()) - } else { - None - } - }) - .last(); - - let client = ConfiguredDBClient { - client, - config: Default::default(), - }; + let mut config = parsed.config; + + // If database wasn't explicitly specified in URL, query the server for the current database + let db_name = if !parsed.database_was_explicit { + // create_client(config) calls `with_database(config.database)` when we're not sure which DB is the real default + let client = Client::default() + .with_url(format!( + "{}://{}:{}", + if config.use_ssl { "https" } else { "http" }, + config.host, + config.host_port + )) + .with_user(config.user.to_string()) + .with_password(config.password.to_string()); - let db = match url_db { - None => client - .client + // No database was specified in URL, query the server + client .query("select database()") .fetch_one::() .await @@ -151,25 +112,32 @@ pub async fn create_client_and_db( Message::new("Failure".to_string(), "fetching database".to_string()), e, ) - })?, - Some(db) => db, + })? + } else { + config.db_name.clone() }; - Ok((client, db)) + // Update config with detected database name if it changed + if db_name != config.db_name { + config.db_name = db_name.clone(); + } + + Ok((create_client(config), db_name)) } fn write_external_models_file( language: SupportedLanguages, tables: &[Table], file_path: Option<&str>, + source_dir: &str, ) -> Result<(), RoutineFailure> { let file = match (language, file_path) { (_, Some(path)) => Cow::Borrowed(path), (SupportedLanguages::Typescript, None) => { - Cow::Owned(format!("{APP_DIR}/{TYPESCRIPT_EXTERNAL_FILE}")) + Cow::Owned(format!("{source_dir}/{TYPESCRIPT_EXTERNAL_FILE}")) } (SupportedLanguages::Python, None) => { - Cow::Owned(format!("{APP_DIR}/{PYTHON_EXTERNAL_FILE}")) + Cow::Owned(format!("{source_dir}/{PYTHON_EXTERNAL_FILE}")) } }; match language { @@ -255,6 +223,8 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine e, ) })?; + // TODO: Also call list_sql_resources to fetch Views/MVs and generate code for them. + // Currently we only generate code for Tables. let (tables, unsupported) = client.list_tables(&db, &project).await.map_err(|e| { RoutineFailure::new( Message::new("Failure".to_string(), "listing tables".to_string()), @@ -292,7 +262,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine .create(true) .write(true) .truncate(true) - .open(format!("{APP_DIR}/{TYPESCRIPT_EXTERNAL_FILE}")) + .open(format!("{}/{TYPESCRIPT_EXTERNAL_FILE}", project.source_dir)) .map_err(|e| { RoutineFailure::new( Message::new( @@ -311,7 +281,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine e, ) })?; - let main_path = format!("{APP_DIR}/{TYPESCRIPT_MAIN_FILE}"); + let main_path = format!("{}/{TYPESCRIPT_MAIN_FILE}", project.source_dir); let import_stmt = "import \"./externalModels\";"; let needs_import = match std::fs::read_to_string(&main_path) { Ok(contents) => !contents.contains(import_stmt), @@ -346,7 +316,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine let table_definitions = tables_to_typescript(&managed, None); let mut file = std::fs::OpenOptions::new() .append(true) - .open(format!("{APP_DIR}/{TYPESCRIPT_MAIN_FILE}")) + .open(format!("{}/{TYPESCRIPT_MAIN_FILE}", project.source_dir)) .map_err(|e| { RoutineFailure::new( Message::new( @@ -376,7 +346,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine .create(true) .write(true) .truncate(true) - .open(format!("{APP_DIR}/{PYTHON_EXTERNAL_FILE}")) + .open(format!("{}/{PYTHON_EXTERNAL_FILE}", project.source_dir)) .map_err(|e| { RoutineFailure::new( Message::new( @@ -395,7 +365,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine e, ) })?; - let main_path = format!("{APP_DIR}/{PYTHON_MAIN_FILE}"); + let main_path = format!("{}/{PYTHON_MAIN_FILE}", project.source_dir); let import_stmt = "from .external_models import *"; let needs_import = match std::fs::read_to_string(&main_path) { Ok(contents) => !contents.contains(import_stmt), @@ -429,7 +399,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine let table_definitions = tables_to_python(&managed, None); let mut file = std::fs::OpenOptions::new() .append(true) - .open(format!("{APP_DIR}/{PYTHON_MAIN_FILE}")) + .open(format!("{}/{PYTHON_MAIN_FILE}", project.source_dir)) .map_err(|e| { RoutineFailure::new( Message::new( @@ -534,7 +504,12 @@ pub async fn db_pull( // Keep a stable ordering for deterministic output tables_for_external_file.sort_by(|a, b| a.name.cmp(&b.name)); - write_external_models_file(project.language, &tables_for_external_file, file_path)?; + write_external_models_file( + project.language, + &tables_for_external_file, + file_path, + &project.source_dir, + )?; match create_code_generation_commit( ".".as_ref(), diff --git a/apps/framework-cli/src/cli/routines/dev.rs b/apps/framework-cli/src/cli/routines/dev.rs index f602a35057..c62b271155 100644 --- a/apps/framework-cli/src/cli/routines/dev.rs +++ b/apps/framework-cli/src/cli/routines/dev.rs @@ -17,7 +17,7 @@ pub fn run_local_infrastructure( docker_client: &DockerClient, ) -> anyhow::Result<()> { // Debug log to check load_infra value at runtime - log::info!( + tracing::info!( "[moose] DEBUG: load_infra from config: {:?}, should_load_infra(): {}", project.load_infra, project.should_load_infra() diff --git a/apps/framework-cli/src/cli/routines/docker_packager.rs b/apps/framework-cli/src/cli/routines/docker_packager.rs index 075c80121c..7ecbda7c1d 100644 --- a/apps/framework-cli/src/cli/routines/docker_packager.rs +++ b/apps/framework-cli/src/cli/routines/docker_packager.rs @@ -3,8 +3,8 @@ use crate::cli::display::with_spinner_completion; use crate::cli::routines::util::ensure_docker_running; use crate::framework::languages::SupportedLanguages; use crate::utilities::constants::{ - APP_DIR, OLD_PROJECT_CONFIG_FILE, PACKAGE_JSON, PROJECT_CONFIG_FILE, REQUIREMENTS_TXT, - SETUP_PY, TSCONFIG_JSON, + OLD_PROJECT_CONFIG_FILE, PACKAGE_JSON, PROJECT_CONFIG_FILE, REQUIREMENTS_TXT, SETUP_PY, + TSCONFIG_JSON, }; use crate::utilities::docker::DockerClient; use crate::utilities::nodejs_version::determine_node_version_from_package_json; @@ -12,10 +12,10 @@ use crate::utilities::package_managers::get_lock_file_path; use crate::utilities::{constants, system}; use crate::{cli::display::Message, project::Project}; -use log::{debug, error, info}; use serde_json::Value as JsonValue; use std::fs; use std::path::{Path, PathBuf}; +use tracing::{debug, error, info}; #[derive(Debug, Clone)] struct PackageInfo { @@ -588,7 +588,7 @@ pub fn build_dockerfile( // Copy app & etc to packager directory let project_root_path = project.project_location.clone(); let items_to_copy = vec![ - APP_DIR, + &project.source_dir, PACKAGE_JSON, SETUP_PY, REQUIREMENTS_TXT, diff --git a/apps/framework-cli/src/cli/routines/format_query.rs b/apps/framework-cli/src/cli/routines/format_query.rs new file mode 100644 index 0000000000..f113f7564c --- /dev/null +++ b/apps/framework-cli/src/cli/routines/format_query.rs @@ -0,0 +1,318 @@ +//! Module for formatting SQL queries as code literals. +//! +//! Supports formatting SQL queries as Python raw strings or TypeScript template literals +//! for easy copy-pasting into application code. + +use crate::cli::display::Message; +use crate::cli::routines::RoutineFailure; +use sqlparser::ast::Statement; +use sqlparser::dialect::ClickHouseDialect; +use sqlparser::parser::Parser; + +/// Supported languages for code formatting +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CodeLanguage { + Python, + TypeScript, +} + +impl CodeLanguage { + /// Parse language string into CodeLanguage enum + pub fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "python" | "py" => Ok(CodeLanguage::Python), + "typescript" | "ts" => Ok(CodeLanguage::TypeScript), + _ => Err(RoutineFailure::error(Message::new( + "Format Query".to_string(), + format!( + "Unsupported language: '{}'. Supported: python, typescript", + s + ), + ))), + } + } +} + +/// Parse SQL using ClickHouse dialect +fn parse_sql(sql: &str) -> Result, RoutineFailure> { + let dialect = ClickHouseDialect {}; + Parser::parse_sql(&dialect, sql).map_err(|e| { + RoutineFailure::error(Message::new( + "SQL Parsing".to_string(), + format!("Invalid SQL syntax: {}", e), + )) + }) +} + +/// Validate SQL syntax using sqlparser. +/// +/// Parses the SQL query to ensure it's syntactically valid before formatting or execution. +/// +/// # Arguments +/// +/// * `sql` - The SQL query string to validate +/// +/// # Returns +/// +/// * `Result<(), RoutineFailure>` - Ok if valid, error with helpful message if invalid +pub fn validate_sql(sql: &str) -> Result<(), RoutineFailure> { + parse_sql(sql)?; + Ok(()) +} + +/// Prettify SQL query using sqlparser's pretty printing. +/// +/// Parses the SQL and formats it with proper indentation and line breaks. +/// +/// # Arguments +/// +/// * `sql` - The SQL query string to prettify +/// +/// # Returns +/// +/// * `Result` - Prettified SQL string or error +fn prettify_sql(sql: &str) -> Result { + let statements = parse_sql(sql)?; + + // Format all statements with pretty printing + let formatted: Vec = statements + .iter() + .map(|stmt| format!("{:#}", stmt)) + .collect(); + + Ok(formatted.join(";\n")) +} + +/// Format SQL query as a code literal for the specified language. +/// +/// # Arguments +/// +/// * `sql` - The SQL query string to format +/// * `language` - Target language (Python or TypeScript) +/// * `prettify` - Whether to prettify SQL before formatting +/// +/// # Returns +/// +/// * `Result` - Formatted code literal or error +pub fn format_as_code( + sql: &str, + language: CodeLanguage, + prettify: bool, +) -> Result { + let sql_to_format = if prettify { + prettify_sql(sql)? + } else { + sql.to_string() + }; + + let formatted = match language { + CodeLanguage::Python => format_python(&sql_to_format), + CodeLanguage::TypeScript => format_typescript(&sql_to_format), + }; + + Ok(formatted) +} + +/// Format SQL as Python raw triple-quoted string +fn format_python(sql: &str) -> String { + format!("r\"\"\"\n{}\n\"\"\"", sql.trim()) +} + +/// Format SQL as TypeScript template literal +fn format_typescript(sql: &str) -> String { + format!("`\n{}\n`", sql.trim()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_language_from_str() { + assert_eq!( + CodeLanguage::from_str("python").unwrap(), + CodeLanguage::Python + ); + assert_eq!(CodeLanguage::from_str("py").unwrap(), CodeLanguage::Python); + assert_eq!( + CodeLanguage::from_str("typescript").unwrap(), + CodeLanguage::TypeScript + ); + assert_eq!( + CodeLanguage::from_str("ts").unwrap(), + CodeLanguage::TypeScript + ); + assert!(CodeLanguage::from_str("java").is_err()); + } + + #[test] + fn test_format_python() { + let sql = "SELECT * FROM users\nWHERE id = 1"; + let result = format_python(sql); + assert_eq!(result, "r\"\"\"\nSELECT * FROM users\nWHERE id = 1\n\"\"\""); + } + + #[test] + fn test_format_python_with_regex() { + let sql = r"SELECT * FROM users WHERE email REGEXP '[a-z]+'"; + let result = format_python(sql); + assert!(result.starts_with("r\"\"\"")); + assert!(result.contains(r"REGEXP '[a-z]+'")); + } + + #[test] + fn test_format_typescript() { + let sql = "SELECT * FROM users\nWHERE id = 1"; + let result = format_typescript(sql); + assert_eq!(result, "`\nSELECT * FROM users\nWHERE id = 1\n`"); + } + + #[test] + fn test_format_as_code_python() { + let sql = "SELECT 1"; + let result = format_as_code(sql, CodeLanguage::Python, false).unwrap(); + assert_eq!(result, "r\"\"\"\nSELECT 1\n\"\"\""); + } + + #[test] + fn test_format_as_code_typescript() { + let sql = "SELECT 1"; + let result = format_as_code(sql, CodeLanguage::TypeScript, false).unwrap(); + assert_eq!(result, "`\nSELECT 1\n`"); + } + + #[test] + fn test_format_python_multiline_complex() { + let sql = r#"SELECT + user_id, + email, + created_at +FROM users +WHERE email REGEXP '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' + AND status = 'active' +ORDER BY created_at DESC"#; + let result = format_python(sql); + assert!(result.starts_with("r\"\"\"")); + assert!(result.ends_with("\"\"\"")); + assert!(result.contains("REGEXP")); + assert!(result.contains("ORDER BY")); + // Verify backslashes are preserved as-is in raw string + assert!(result.contains(r"[a-zA-Z0-9._%+-]+")); + } + + #[test] + fn test_format_python_complex_regex_patterns() { + // Test various regex special characters + let sql = r"SELECT * FROM logs WHERE message REGEXP '\\d{4}-\\d{2}-\\d{2}\\s+\\w+'"; + let result = format_python(sql); + assert!(result.contains(r"\\d{4}-\\d{2}-\\d{2}\\s+\\w+")); + + // Test with character classes and quantifiers + let sql2 = r"SELECT * FROM data WHERE field REGEXP '[A-Z]{3,5}\-\d+'"; + let result2 = format_python(sql2); + assert!(result2.contains(r"[A-Z]{3,5}\-\d+")); + } + + #[test] + fn test_format_typescript_multiline_complex() { + let sql = r#"SELECT + order_id, + customer_email, + total_amount +FROM orders +WHERE customer_email REGEXP '[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}' + AND total_amount > 100 +LIMIT 50"#; + let result = format_typescript(sql); + assert!(result.starts_with("`")); + assert!(result.ends_with("`")); + assert!(result.contains("REGEXP")); + assert!(result.contains("LIMIT 50")); + } + + #[test] + fn test_format_preserves_indentation() { + let sql = "SELECT *\n FROM users\n WHERE id = 1"; + let python_result = format_python(sql); + let typescript_result = format_typescript(sql); + + // Both should preserve the indentation + assert!(python_result.contains(" FROM users")); + assert!(python_result.contains(" WHERE id = 1")); + assert!(typescript_result.contains(" FROM users")); + assert!(typescript_result.contains(" WHERE id = 1")); + } + + #[test] + fn test_format_python_with_quotes_and_backslashes() { + // SQL with single quotes and backslashes + let sql = r"SELECT * FROM data WHERE pattern REGEXP '\\b(foo|bar)\\b' AND name = 'test'"; + let result = format_python(sql); + // Raw strings should preserve everything as-is + assert!(result.contains(r"\\b(foo|bar)\\b")); + assert!(result.contains("name = 'test'")); + } + + #[test] + fn test_prettify_sql_basic() { + let sql = "SELECT id, name FROM users WHERE active = 1 ORDER BY name"; + let result = prettify_sql(sql).unwrap(); + + assert!(result.contains("SELECT")); + assert!(result.contains("FROM")); + assert!(result.contains("users")); + assert!(result.contains("WHERE")); + // Should have line breaks with sqlparser formatting + assert!(result.contains('\n')); + } + + #[test] + fn test_prettify_sql_preserves_values() { + let sql = "SELECT * FROM users WHERE email = 'test@example.com'"; + let result = prettify_sql(sql).unwrap(); + + // Should preserve the email value + assert!(result.contains("test@example.com")); + } + + #[test] + fn test_format_as_code_with_prettify() { + let sql = "SELECT id, name FROM users WHERE active = 1"; + + // With prettify + let result = format_as_code(sql, CodeLanguage::Python, true).unwrap(); + assert!(result.starts_with("r\"\"\"")); + assert!(result.contains('\n')); + assert!(result.contains("SELECT")); + + // Without prettify + let result_no_prettify = format_as_code(sql, CodeLanguage::Python, false).unwrap(); + assert!(result_no_prettify.starts_with("r\"\"\"")); + assert!(result_no_prettify.contains("SELECT id, name FROM users")); + } + + #[test] + fn test_prettify_with_complex_query() { + let sql = "SELECT u.id, u.name, o.total FROM users u LEFT JOIN orders o ON u.id = o.user_id WHERE u.active = 1 AND o.total > 100 ORDER BY o.total DESC LIMIT 10"; + let result = prettify_sql(sql).unwrap(); + + assert!(result.contains("SELECT")); + assert!(result.contains("FROM")); + assert!(result.contains("users")); + assert!(result.contains("JOIN")); + assert!(result.contains("WHERE")); + assert!(result.contains("LIMIT")); + } + + #[test] + fn test_validate_sql_valid() { + let sql = "SELECT * FROM users WHERE id = 1"; + assert!(validate_sql(sql).is_ok()); + } + + #[test] + fn test_validate_sql_invalid() { + let sql = "INVALID SQL SYNTAX ;;; NOT VALID"; + assert!(validate_sql(sql).is_err()); + } +} diff --git a/apps/framework-cli/src/cli/routines/kafka_pull.rs b/apps/framework-cli/src/cli/routines/kafka_pull.rs index 535f8f947a..c7e9c863bb 100644 --- a/apps/framework-cli/src/cli/routines/kafka_pull.rs +++ b/apps/framework-cli/src/cli/routines/kafka_pull.rs @@ -10,7 +10,6 @@ use crate::framework::typescript::generate::sanitize_typescript_identifier; use crate::infrastructure::stream::kafka::client::fetch_topics; use crate::project::Project; use globset::{Glob, GlobMatcher}; -use log::{info, warn}; use schema_registry_client::rest::apis::Error as SchemaRegistryError; use schema_registry_client::rest::schema_registry_client::{ Client as SrClientTrait, SchemaRegistryClient, @@ -19,6 +18,7 @@ use serde_json::Value; use std::fs; use std::path::Path; use std::str::FromStr; +use tracing::{info, warn}; fn build_matcher(s: &str) -> Result { let matcher = Glob::new(s) diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 5f5bb4da96..afa256c69d 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -6,7 +6,7 @@ use crate::framework::core::infrastructure::table::Table; use crate::framework::core::infrastructure_map::InfrastructureMap; use crate::framework::core::migration_plan::MigrationPlan; use crate::framework::core::state_storage::{StateStorage, StateStorageBuilder}; -use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::config::{ClickHouseConfig, ClusterConfig}; use crate::infrastructure::olap::clickhouse::IgnorableOperation; use crate::infrastructure::olap::clickhouse::{ check_ready, create_client, ConfiguredDBClient, SerializableOlapOperation, @@ -196,120 +196,226 @@ fn report_drift(drift: &DriftStatus) { } } -/// Validates that all table databases specified in operations are configured -fn validate_table_databases( +/// Validates that all table databases and clusters specified in operations are configured +fn validate_table_databases_and_clusters( operations: &[SerializableOlapOperation], primary_database: &str, additional_databases: &[String], + clusters: &Option>, ) -> Result<()> { let mut invalid_tables = Vec::new(); - - // Helper to validate a database option - let mut validate_db = |db_opt: &Option, table_name: &str| { + let mut invalid_clusters = Vec::new(); + + // Get configured cluster names + let cluster_names: Vec = clusters + .as_ref() + .map(|cs| cs.iter().map(|c| c.name.clone()).collect()) + .unwrap_or_default(); + + tracing::info!("Configured cluster names: {:?}", cluster_names); + + // Helper to validate database and cluster options + let mut validate = |db_opt: &Option, cluster_opt: &Option, table_name: &str| { + tracing::info!( + "Validating table '{}' with cluster: {:?}", + table_name, + cluster_opt + ); + // Validate database if let Some(db) = db_opt { if db != primary_database && !additional_databases.contains(db) { invalid_tables.push((table_name.to_string(), db.clone())); } } + // Validate cluster + if let Some(cluster) = cluster_opt { + tracing::info!( + "Checking if cluster '{}' is in {:?}", + cluster, + cluster_names + ); + // Fail if cluster is not in the configured list (or if list is empty) + if cluster_names.is_empty() || !cluster_names.contains(cluster) { + tracing::info!("Cluster '{}' not found in configured clusters!", cluster); + invalid_clusters.push((table_name.to_string(), cluster.clone())); + } + } }; for operation in operations { match operation { SerializableOlapOperation::CreateTable { table } => { - validate_db(&table.database, &table.name); + validate(&table.database, &table.cluster_name, &table.name); } - SerializableOlapOperation::DropTable { table, database } => { - validate_db(database, table); + SerializableOlapOperation::DropTable { + table, + database, + cluster_name, + } => { + validate(database, cluster_name, table); } SerializableOlapOperation::AddTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::DropTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RenameTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableSettings { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableTtl { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::AddTableIndex { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::DropTableIndex { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifySampleBy { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RemoveSampleBy { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RawSql { .. } => { - // RawSql doesn't reference specific tables/databases, skip validation + // RawSql doesn't reference specific tables/databases/clusters, skip validation } } } - if !invalid_tables.is_empty() { - let mut error_message = String::from( - "One or more tables specify databases that are not configured in moose.config.toml:\n\n" - ); + // Build error message if we found any issues + let has_errors = !invalid_tables.is_empty() || !invalid_clusters.is_empty(); + if has_errors { + let mut error_message = String::new(); - for (table_name, database) in &invalid_tables { - error_message.push_str(&format!( - " • Table '{}' specifies database '{}'\n", - table_name, database - )); - } + // Report database errors + if !invalid_tables.is_empty() { + error_message.push_str( + "One or more tables specify databases that are not configured in moose.config.toml:\n\n", + ); - error_message - .push_str("\nTo fix this, add the missing database(s) to your moose.config.toml:\n\n"); - error_message.push_str("[clickhouse_config]\n"); - error_message.push_str(&format!("db_name = \"{}\"\n", primary_database)); - error_message.push_str("additional_databases = ["); + for (table_name, database) in &invalid_tables { + error_message.push_str(&format!( + " • Table '{}' specifies database '{}'\n", + table_name, database + )); + } - let mut all_databases: Vec = additional_databases.to_vec(); - for (_, db) in &invalid_tables { - if !all_databases.contains(db) { - all_databases.push(db.clone()); + error_message.push_str( + "\nTo fix this, add the missing database(s) to your moose.config.toml:\n\n", + ); + error_message.push_str("[clickhouse_config]\n"); + error_message.push_str(&format!("db_name = \"{}\"\n", primary_database)); + error_message.push_str("additional_databases = ["); + + let mut all_databases: Vec = additional_databases.to_vec(); + for (_, db) in &invalid_tables { + if !all_databases.contains(db) { + all_databases.push(db.clone()); + } } + all_databases.sort(); + + let db_list = all_databases + .iter() + .map(|db| format!("\"{}\"", db)) + .collect::>() + .join(", "); + error_message.push_str(&db_list); + error_message.push_str("]\n"); } - all_databases.sort(); - let db_list = all_databases - .iter() - .map(|db| format!("\"{}\"", db)) - .collect::>() - .join(", "); - error_message.push_str(&db_list); - error_message.push_str("]\n"); + // Report cluster errors + if !invalid_clusters.is_empty() { + if !invalid_tables.is_empty() { + error_message.push('\n'); + } + + error_message.push_str( + "One or more tables specify clusters that are not configured in moose.config.toml:\n\n", + ); + + for (table_name, cluster) in &invalid_clusters { + error_message.push_str(&format!( + " • Table '{}' specifies cluster '{}'\n", + table_name, cluster + )); + } + + error_message.push_str( + "\nTo fix this, add the missing cluster(s) to your moose.config.toml:\n\n", + ); + + // Only show the missing clusters in the error message, not the already configured ones + let mut missing_clusters: Vec = invalid_clusters + .iter() + .map(|(_, cluster)| cluster.clone()) + .collect(); + missing_clusters.sort(); + missing_clusters.dedup(); + + for cluster in &missing_clusters { + error_message.push_str("[[clickhouse_config.clusters]]\n"); + error_message.push_str(&format!("name = \"{}\"\n\n", cluster)); + } + } anyhow::bail!(error_message); } @@ -341,11 +447,16 @@ async fn execute_operations( migration_plan.operations.len() ); - // Validate that all table databases are configured - validate_table_databases( + // Validate that all table databases and clusters are configured + tracing::info!( + "Validating operations against config. Clusters: {:?}", + project.clickhouse_config.clusters + ); + validate_table_databases_and_clusters( &migration_plan.operations, &project.clickhouse_config.db_name, &project.clickhouse_config.additional_databases, + &project.clickhouse_config.clusters, )?; let is_dev = !project.is_production; @@ -474,19 +585,28 @@ pub async fn execute_migration( let target_table_ids: HashSet = current_infra_map.tables.keys().cloned().collect(); + let target_sql_resource_ids: HashSet = + current_infra_map.sql_resources.keys().cloned().collect(); + let olap_client = create_client(clickhouse_config.clone()); - reconcile_with_reality(project, ¤t_infra_map, &target_table_ids, olap_client) - .await - .map_err(|e| { - RoutineFailure::new( - Message::new( - "Reconciliation".to_string(), - "Failed to reconcile state with ClickHouse reality".to_string(), - ), - anyhow::anyhow!("{:?}", e), - ) - })? + reconcile_with_reality( + project, + ¤t_infra_map, + &target_table_ids, + &target_sql_resource_ids, + olap_client, + ) + .await + .map_err(|e| { + RoutineFailure::new( + Message::new( + "Reconciliation".to_string(), + "Failed to reconcile state with ClickHouse reality".to_string(), + ), + anyhow::anyhow!("{:?}", e), + ) + })? } else { current_infra_map }; @@ -530,7 +650,7 @@ pub async fn execute_migration( // Always release lock explicitly before returning // This ensures cleanup happens even if any operation above failed if let Err(e) = state_storage.release_migration_lock().await { - log::warn!("Failed to release migration lock: {}", e); + tracing::warn!("Failed to release migration lock: {}", e); } result @@ -626,6 +746,7 @@ mod tests { use crate::framework::core::infrastructure::table::{Column, ColumnType, OrderBy}; use crate::framework::core::infrastructure_map::PrimitiveSignature; use crate::framework::core::partial_infrastructure_map::LifeCycle; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; /// Helper to create a minimal test table fn create_test_table(name: &str) -> Table { @@ -653,12 +774,14 @@ mod tests { primitive_type: crate::framework::core::infrastructure_map::PrimitiveTypes::DataModel, }, - engine: None, + engine: ClickhouseEngine::MergeTree, metadata: None, life_cycle: LifeCycle::FullyManaged, engine_params_hash: None, table_settings: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, } } @@ -964,7 +1087,7 @@ mod tests { }]; // Primary database matches - should pass - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_ok()); // Database in additional_databases - should pass @@ -973,7 +1096,12 @@ mod tests { let operations = vec![SerializableOlapOperation::CreateTable { table: table_analytics, }]; - let result = validate_table_databases(&operations, "local", &["analytics".to_string()]); + let result = validate_table_databases_and_clusters( + &operations, + "local", + &["analytics".to_string()], + &None, + ); assert!(result.is_ok()); } @@ -985,7 +1113,7 @@ mod tests { let operations = vec![SerializableOlapOperation::CreateTable { table }]; // Database not in config - should fail - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_err()); let err = result.unwrap_err().to_string(); assert!(err.contains("unconfigured_db")); @@ -999,6 +1127,7 @@ mod tests { SerializableOlapOperation::DropTable { table: "test".to_string(), database: Some("bad_db".to_string()), + cluster_name: None, }, SerializableOlapOperation::AddTableColumn { table: "test".to_string(), @@ -1015,6 +1144,7 @@ mod tests { }, after_column: None, database: Some("bad_db".to_string()), + cluster_name: None, }, SerializableOlapOperation::ModifyTableColumn { table: "test".to_string(), @@ -1041,10 +1171,11 @@ mod tests { ttl: None, }, database: Some("another_bad_db".to_string()), + cluster_name: None, }, ]; - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_err()); let err = result.unwrap_err().to_string(); // Should report both bad databases @@ -1060,7 +1191,108 @@ mod tests { description: "test".to_string(), }]; - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); + assert!(result.is_ok()); + } + + #[test] + fn test_validate_cluster_valid() { + let mut table = create_test_table("users"); + table.cluster_name = Some("my_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { + table: table.clone(), + }]; + + let clusters = Some(vec![ClusterConfig { + name: "my_cluster".to_string(), + }]); + + // Cluster is configured - should pass + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); assert!(result.is_ok()); } + + #[test] + fn test_validate_cluster_invalid() { + let mut table = create_test_table("users"); + table.cluster_name = Some("unconfigured_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + let clusters = Some(vec![ + ClusterConfig { + name: "my_cluster".to_string(), + }, + ClusterConfig { + name: "another_cluster".to_string(), + }, + ]); + + // Cluster not in config - should fail and show available clusters + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("unconfigured_cluster"), + "Error should mention the invalid cluster" + ); + assert!( + err.contains("moose.config.toml"), + "Error should reference config file" + ); + } + + #[test] + fn test_validate_cluster_no_clusters_configured() { + let mut table = create_test_table("users"); + table.cluster_name = Some("some_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + // No clusters configured but table references one - should fail + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("some_cluster")); + } + + #[test] + fn test_validate_both_database_and_cluster_invalid() { + let mut table = create_test_table("users"); + table.database = Some("bad_db".to_string()); + table.cluster_name = Some("bad_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + let clusters = Some(vec![ClusterConfig { + name: "good_cluster".to_string(), + }]); + + // Both database and cluster invalid - should report both errors + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("bad_db")); + assert!(err.contains("bad_cluster")); + } + + #[test] + fn test_validate_cluster_in_drop_table_operation() { + let operations = vec![SerializableOlapOperation::DropTable { + table: "users".to_string(), + database: None, + cluster_name: Some("unconfigured_cluster".to_string()), + }]; + + let clusters = Some(vec![ClusterConfig { + name: "my_cluster".to_string(), + }]); + + // DropTable with invalid cluster - should fail + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("unconfigured_cluster")); + } } diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index 49492b11c8..c25017e6c8 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -98,7 +98,6 @@ use crate::framework::core::migration_plan::{MigrationPlan, MigrationPlanWithBef use crate::framework::core::plan_validator; use crate::infrastructure::redis::redis_client::RedisClient; use crate::project::Project; -use log::{debug, error, info, warn}; use serde::Deserialize; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; @@ -106,6 +105,7 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use tokio::sync::RwLock; use tokio::time::{interval, Duration}; +use tracing::{debug, error, info, warn}; use super::super::metrics::Metrics; use super::local_webserver::{PlanRequest, PlanResponse, Webserver}; @@ -161,6 +161,7 @@ pub mod clean; pub mod code_generation; pub mod dev; pub mod docker_packager; +pub mod format_query; pub mod kafka_pull; pub mod logs; pub mod ls; @@ -169,6 +170,7 @@ pub mod migrate; pub mod openapi; pub mod peek; pub mod ps; +pub mod query; pub mod scripts; pub mod seed_data; pub mod templates; @@ -485,7 +487,9 @@ pub async fn start_development_mode( .to_string(), } ); - log::warn!("Failed to write suppression flag to config: {e:?}"); + tracing::warn!( + "Failed to write suppression flag to config: {e:?}" + ); } } None @@ -1018,7 +1022,17 @@ pub async fn remote_plan( .map(|t| t.id(&local_infra_map.default_database)) .collect(); - get_remote_inframap_serverless(project, clickhouse_url, None, table_names).await? + let sql_resource_ids: HashSet = + local_infra_map.sql_resources.keys().cloned().collect(); + + get_remote_inframap_serverless( + project, + clickhouse_url, + None, + &table_names, + &sql_resource_ids, + ) + .await? } else { // Moose server flow display::show_message_wrapper( @@ -1063,6 +1077,12 @@ pub async fn remote_plan( } }; + // Normalize both infra maps for backward compatibility + // This ensures consistent comparison between old and new CLI versions + // by applying the same normalization logic (e.g., filling order_by from primary key) + let remote_infra_map = remote_infra_map.normalize(); + let local_infra_map = local_infra_map.normalize(); + // Calculate and display changes let changes = calculate_plan_diff_local( &remote_infra_map, @@ -1157,16 +1177,32 @@ pub async fn remote_gen_migration( .to_string(), }, ); - let table_ids: HashSet = local_infra_map + + let target_table_ids: HashSet = local_infra_map .tables .values() .map(|t| t.id(&local_infra_map.default_database)) .collect(); - get_remote_inframap_serverless(project, clickhouse_url, redis_url.as_deref(), table_ids) - .await? + let target_sql_resource_ids: HashSet = + local_infra_map.sql_resources.keys().cloned().collect(); + + get_remote_inframap_serverless( + project, + clickhouse_url, + redis_url.as_deref(), + &target_table_ids, + &target_sql_resource_ids, + ) + .await? } }; + // Normalize both infra maps for backward compatibility + // This ensures consistent comparison between old and new CLI versions + // by applying the same normalization logic (e.g., filling order_by from primary key) + let remote_infra_map = remote_infra_map.normalize(); + let local_infra_map = local_infra_map.normalize(); + let changes = calculate_plan_diff_local( &remote_infra_map, &local_infra_map, @@ -1181,8 +1217,16 @@ pub async fn remote_gen_migration( }, ); + // Validate the plan before generating migration files + let plan = InfraPlan { + target_infra_map: local_infra_map.clone(), + changes, + }; + + plan_validator::validate(project, &plan)?; + let db_migration = - MigrationPlan::from_infra_plan(&changes, &project.clickhouse_config.db_name)?; + MigrationPlan::from_infra_plan(&plan.changes, &project.clickhouse_config.db_name)?; Ok(MigrationPlanWithBeforeAfter { remote_state: remote_infra_map, @@ -1198,7 +1242,8 @@ async fn get_remote_inframap_serverless( project: &Project, clickhouse_url: &str, redis_url: Option<&str>, - target_table_ids: HashSet, + target_table_ids: &HashSet, + target_sql_resource_ids: &HashSet, ) -> anyhow::Result { use crate::framework::core::plan::reconcile_with_reality; use crate::infrastructure::olap::clickhouse::config::parse_clickhouse_connection_string; @@ -1225,7 +1270,8 @@ async fn get_remote_inframap_serverless( reconcile_with_reality( project, &remote_infra_map, - &target_table_ids, + target_table_ids, + target_sql_resource_ids, reconcile_client, ) .await? diff --git a/apps/framework-cli/src/cli/routines/peek.rs b/apps/framework-cli/src/cli/routines/peek.rs index 7e8db9bdd1..8fc031e921 100644 --- a/apps/framework-cli/src/cli/routines/peek.rs +++ b/apps/framework-cli/src/cli/routines/peek.rs @@ -6,7 +6,7 @@ use crate::cli::display::Message; use crate::framework::core::infrastructure_map::InfrastructureMap; use crate::infrastructure::olap::clickhouse::mapper::std_table_to_clickhouse_table; -use crate::infrastructure::olap::clickhouse_alt_client::{get_pool, select_some_as_json}; +use crate::infrastructure::olap::clickhouse_http_client::create_query_client; use crate::project::Project; use super::{setup_redis_client, RoutineFailure, RoutineSuccess}; @@ -14,7 +14,6 @@ use super::{setup_redis_client, RoutineFailure, RoutineSuccess}; use crate::infrastructure::olap::clickhouse::model::ClickHouseTable; use crate::infrastructure::stream::kafka::client::create_consumer; use futures::stream::BoxStream; -use log::info; use rdkafka::consumer::{Consumer, StreamConsumer}; use rdkafka::{Message as KafkaMessage, Offset, TopicPartitionList}; use serde_json::Value; @@ -24,6 +23,7 @@ use std::time::Duration; use tokio::fs::File; use tokio::io::AsyncWriteExt; use tokio_stream::StreamExt; +use tracing::info; /// Retrieves and displays a sample of data from either a database table or streaming topic. /// @@ -49,13 +49,8 @@ pub async fn peek( file: Option, is_stream: bool, ) -> Result { - let pool = get_pool(&project.clickhouse_config); - let mut client = pool.get_handle().await.map_err(|_| { - RoutineFailure::error(Message::new( - "Failed".to_string(), - "Error connecting to storage".to_string(), - )) - })?; + // Get HTTP-based ClickHouse client + let client = create_query_client(&project.clickhouse_config); let redis_client = setup_redis_client(project.clone()).await.map_err(|e| { RoutineFailure::error(Message { @@ -163,22 +158,64 @@ pub async fn peek( )) })?; - Box::pin( - select_some_as_json( - &project.clickhouse_config.db_name, - &table_ref, - &mut client, - limit as i64, - ) - .await - .map_err(|_| { - RoutineFailure::error(Message::new( - "Failed".to_string(), - "Error selecting data".to_string(), - )) - })? - .map(|result| anyhow::Ok(result?)), + // Build the SELECT query + let order_by = match &table_ref.order_by { + crate::framework::core::infrastructure::table::OrderBy::Fields(fields) + if !fields.is_empty() => + { + format!( + "ORDER BY {}", + crate::infrastructure::olap::clickhouse::model::wrap_and_join_column_names( + fields, ", " + ) + ) + } + crate::framework::core::infrastructure::table::OrderBy::SingleExpr(expr) => { + format!("ORDER BY {expr}") + } + _ => { + // Fall back to primary key + let key_columns: Vec = table_ref + .primary_key_columns() + .iter() + .map(|s| s.to_string()) + .collect(); + + if key_columns.is_empty() { + "".to_string() + } else { + format!( + "ORDER BY {}", + crate::infrastructure::olap::clickhouse::model::wrap_and_join_column_names( + &key_columns, + ", " + ) + ) + } + } + }; + + let query = format!( + "SELECT * FROM \"{}\".\"{}\" {} LIMIT {}", + project.clickhouse_config.db_name, table_ref.name, order_by, limit + ); + + info!("Peek query: {}", query); + + // Execute query + let rows = crate::infrastructure::olap::clickhouse_http_client::query_as_json_stream( + &client, &query, ) + .await + .map_err(|e| { + RoutineFailure::error(Message::new( + "Peek".to_string(), + format!("ClickHouse query error: {}", e), + )) + })?; + + // Convert Vec to stream + Box::pin(tokio_stream::iter(rows.into_iter().map(anyhow::Ok))) }; let mut success_count = 0; @@ -230,7 +267,7 @@ pub async fn peek( success_count += 1; } Err(e) => { - log::error!("Failed to read row {}", e); + tracing::error!("Failed to read row {}", e); } } } diff --git a/apps/framework-cli/src/cli/routines/ps.rs b/apps/framework-cli/src/cli/routines/ps.rs index f584c33adf..512f8c4ddb 100644 --- a/apps/framework-cli/src/cli/routines/ps.rs +++ b/apps/framework-cli/src/cli/routines/ps.rs @@ -3,7 +3,7 @@ use std::{ sync::Arc, }; -use log::error; +use tracing::error; use crate::{ cli::display::{show_table, Message}, diff --git a/apps/framework-cli/src/cli/routines/query.rs b/apps/framework-cli/src/cli/routines/query.rs new file mode 100644 index 0000000000..fbbc9ddd57 --- /dev/null +++ b/apps/framework-cli/src/cli/routines/query.rs @@ -0,0 +1,182 @@ +//! Module for executing arbitrary SQL queries against ClickHouse. +//! +//! This module provides functionality to execute raw SQL queries and return +//! results as JSON for debugging and exploration purposes. + +use crate::cli::display::Message; +use crate::cli::routines::{setup_redis_client, RoutineFailure, RoutineSuccess}; +use crate::framework::core::infrastructure_map::InfrastructureMap; +use crate::infrastructure::olap::clickhouse_http_client::create_query_client; +use crate::project::Project; + +use std::io::Read; +use std::path::PathBuf; +use std::sync::Arc; +use tracing::info; + +/// Reads SQL query from argument, file, or stdin. +/// +/// # Arguments +/// +/// * `sql` - Optional SQL query string from command line +/// * `file` - Optional file path containing SQL query +/// +/// # Returns +/// +/// * `Result` - SQL query string or error +fn get_sql_input(sql: Option, file: Option) -> Result { + if let Some(query_str) = sql { + // SQL provided as argument + Ok(query_str) + } else if let Some(file_path) = file { + // Read SQL from file + std::fs::read_to_string(&file_path).map_err(|e| { + RoutineFailure::new( + Message::new( + "Query".to_string(), + format!("Failed to read file: {}", file_path.display()), + ), + e, + ) + }) + } else { + // Read SQL from stdin + let mut buffer = String::new(); + std::io::stdin().read_to_string(&mut buffer).map_err(|e| { + RoutineFailure::new( + Message::new("Query".to_string(), "Failed to read from stdin".to_string()), + e, + ) + })?; + + if buffer.trim().is_empty() { + return Err(RoutineFailure::error(Message::new( + "Query".to_string(), + "No SQL query provided (use argument, --file, or stdin)".to_string(), + ))); + } + + Ok(buffer) + } +} + +/// Executes a SQL query against ClickHouse and displays results as JSON. +/// +/// Allows users to run arbitrary SQL queries against the ClickHouse database +/// for exploration and debugging. Results are streamed as JSON to stdout. +/// +/// # Arguments +/// +/// * `project` - The project configuration to use +/// * `sql` - Optional SQL query string +/// * `file` - Optional file path containing SQL query +/// * `limit` - Maximum number of rows to return (via ClickHouse settings) +/// * `format_query` - Optional language name to format query as code literal instead of executing +/// * `prettify` - Whether to prettify SQL before formatting +/// +/// # Returns +/// +/// * `Result` - Success or failure of the operation +pub async fn query( + project: Arc, + sql: Option, + file: Option, + limit: u64, + format_query: Option, + prettify: bool, +) -> Result { + let sql_query = get_sql_input(sql, file)?; + + // Validate SQL syntax before any operation + use crate::cli::routines::format_query::validate_sql; + validate_sql(&sql_query)?; + + // If format_query flag is present, format and exit without executing + if let Some(lang_str) = format_query { + use crate::cli::routines::format_query::{format_as_code, CodeLanguage}; + + let language = CodeLanguage::from_str(&lang_str)?; + let formatted = format_as_code(&sql_query, language, prettify)?; + + println!("{}", formatted); + + return Ok(RoutineSuccess::success(Message::new( + "Format Query".to_string(), + format!( + "Formatted as {} code{}", + lang_str, + if prettify { " (prettified)" } else { "" } + ), + ))); + } + + info!("Executing SQL: {}", sql_query); + + // Get HTTP-based ClickHouse client + let client = create_query_client(&project.clickhouse_config); + + let redis_client = setup_redis_client(project.clone()).await.map_err(|e| { + RoutineFailure::error(Message { + action: "Query".to_string(), + details: format!("Failed to setup redis client: {e:?}"), + }) + })?; + + // Validate that infrastructure state exists and is accessible. + let _infra = InfrastructureMap::load_from_redis(&redis_client) + .await + .map_err(|_| { + RoutineFailure::error(Message::new( + "Failed".to_string(), + "Error retrieving current state".to_string(), + )) + })? + .ok_or_else(|| { + RoutineFailure::error(Message::new( + "Failed".to_string(), + "No infrastructure state found. Is 'moose dev' running?".to_string(), + )) + })?; + + // Execute query and get results + let rows = crate::infrastructure::olap::clickhouse_http_client::query_as_json_stream( + &client, &sql_query, + ) + .await + .map_err(|e| { + RoutineFailure::error(Message::new( + "Query".to_string(), + format!("ClickHouse query error: {}", e), + )) + })?; + + // Stream results to stdout + let success_count = rows.len().min(limit as usize); + for (idx, row) in rows.iter().enumerate() { + if idx >= limit as usize { + info!("Reached limit of {} rows", limit); + break; + } + + let json = serde_json::to_string(row).map_err(|e| { + RoutineFailure::new( + Message::new( + "Query".to_string(), + "Failed to serialize result".to_string(), + ), + e, + ) + })?; + + println!("{}", json); + info!("{}", json); + } + + // Add newline for output cleanliness + println!(); + + Ok(RoutineSuccess::success(Message::new( + "Query".to_string(), + format!("{} rows", success_count), + ))) +} diff --git a/apps/framework-cli/src/cli/routines/seed_data.rs b/apps/framework-cli/src/cli/routines/seed_data.rs index 20124a23d6..75a940889f 100644 --- a/apps/framework-cli/src/cli/routines/seed_data.rs +++ b/apps/framework-cli/src/cli/routines/seed_data.rs @@ -13,16 +13,17 @@ use crate::project::Project; use crate::utilities::constants::KEY_REMOTE_CLICKHOUSE_URL; use crate::utilities::keyring::{KeyringSecretRepository, SecretRepository}; -use log::{debug, info, warn}; +use crate::framework::core::infrastructure::table::Table; use std::cmp::min; use std::collections::HashSet; +use tracing::{debug, info, warn}; /// Validates that a database name is not empty fn validate_database_name(db_name: &str) -> Result<(), RoutineFailure> { if db_name.is_empty() { Err(RoutineFailure::error(Message::new( "SeedClickhouse".to_string(), - "No database specified in connection string and unable to determine current database" + "No database specified in ClickHouse URL and unable to determine current database" .to_string(), ))) } else { @@ -36,26 +37,54 @@ fn build_remote_tables_query( remote_user: &str, remote_password: &str, remote_db: &str, + other_dbs: &[&str], ) -> String { + let mut databases = vec![remote_db]; + databases.extend(other_dbs); + + let db_list = databases + .iter() + .map(|db| format!("'{}'", db)) + .collect::>() + .join(", "); + format!( - "SELECT name FROM remoteSecure('{}', 'system', 'tables', '{}', '{}') WHERE database = '{}'", - remote_host_and_port, remote_user, remote_password, remote_db + "SELECT database, name FROM remoteSecure('{}', 'system', 'tables', '{}', '{}') WHERE database IN ({})", + remote_host_and_port, remote_user, remote_password, db_list ) } -/// Parses the response from remote tables query into a HashSet -fn parse_remote_tables_response(response: &str) -> HashSet { +/// Parses the response from remote tables query into a HashSet of (database, table) tuples +fn parse_remote_tables_response(response: &str) -> HashSet<(String, String)> { response .lines() - .map(|line| line.trim().to_string()) - .filter(|table| !table.is_empty()) + .filter_map(|line| { + let line = line.trim(); + if line.is_empty() { + return None; + } + // Split by tab or whitespace to get database and table + let parts: Vec<&str> = line.split('\t').collect(); + if parts.len() >= 2 { + Some((parts[0].trim().to_string(), parts[1].trim().to_string())) + } else { + None + } + }) .collect() } /// Determines if a table should be skipped during seeding -fn should_skip_table(table_name: &str, remote_tables: &Option>) -> bool { +/// db being None means "use the remote default" +fn should_skip_table( + db: &Option, + table_name: &str, + remote_db: &str, + remote_tables: &Option>, +) -> bool { if let Some(ref remote_table_set) = remote_tables { - !remote_table_set.contains(table_name) + let db_to_check = db.as_deref().unwrap_or(remote_db); + !remote_table_set.contains(&(db_to_check.to_string(), table_name.to_string())) } else { false } @@ -128,20 +157,13 @@ async fn load_infrastructure_map(project: &Project) -> Result, total_rows: usize, batch_size: usize, ) -> Result { match order_by { None => { - let table = infra_map.tables.get(table_name).ok_or_else(|| { - RoutineFailure::error(Message::new( - "Seed".to_string(), - format!("{table_name} not found."), - )) - })?; let clause = match &table.order_by { crate::framework::core::infrastructure::table::OrderBy::Fields(v) => v .iter() @@ -159,7 +181,7 @@ fn build_order_by_clause( } else { Err(RoutineFailure::error(Message::new( "Seed".to_string(), - format!("Table {table_name} without ORDER BY. Supply ordering with --order-by to prevent the same row fetched in multiple batches."), + format!("Table {} without ORDER BY. Supply ordering with --order-by to prevent the same row fetched in multiple batches.", table.name), ))) } } @@ -215,23 +237,23 @@ async fn get_remote_table_count( /// Seeds a single table with batched copying async fn seed_single_table( - infra_map: &InfrastructureMap, local_clickhouse: &ClickHouseClient, remote_config: &ClickHouseConfig, - table_name: &str, + table: &Table, limit: Option, order_by: Option<&str>, ) -> Result { let remote_host_and_port = format!("{}:{}", remote_config.host, remote_config.native_port); - let local_db = &local_clickhouse.config().db_name; + let db = table.database.as_deref(); + let local_db = db.unwrap_or(&local_clickhouse.config().db_name); let batch_size: usize = 50_000; // Get total row count let remote_total = get_remote_table_count( local_clickhouse, &remote_host_and_port, - &remote_config.db_name, - table_name, + db.unwrap_or(&remote_config.db_name), + &table.name, &remote_config.user, &remote_config.password, ) @@ -243,7 +265,7 @@ async fn seed_single_table( } else { RoutineFailure::error(Message::new( "SeedSingleTable".to_string(), - format!("Failed to get row count for {table_name}: {e:?}"), + format!("Failed to get row count for {}: {e:?}", table.name), )) } })?; @@ -253,8 +275,7 @@ async fn seed_single_table( Some(l) => min(remote_total, l), }; - let order_by_clause = - build_order_by_clause(table_name, infra_map, order_by, total_rows, batch_size)?; + let order_by_clause = build_order_by_clause(table, order_by, total_rows, batch_size)?; let mut copied_total: usize = 0; let mut i: usize = 0; @@ -268,9 +289,9 @@ async fn seed_single_table( let sql = build_seeding_query(&SeedingQueryParams { local_db, - table_name, + table_name: &table.name, remote_host_and_port: &remote_host_and_port, - remote_db: &remote_config.db_name, + remote_db: db.unwrap_or(&remote_config.db_name), remote_user: &remote_config.user, remote_password: &remote_config.password, order_by_clause: &order_by_clause, @@ -278,50 +299,51 @@ async fn seed_single_table( offset: copied_total, }); - debug!("Executing SQL: table={table_name}, offset={copied_total}, limit={batch_limit}"); + debug!( + "Executing SQL: table={}, offset={copied_total}, limit={batch_limit}", + table.name + ); match local_clickhouse.execute_sql(&sql).await { Ok(_) => { copied_total += batch_limit; - debug!("{table_name}: copied batch {i}"); + debug!("{}: copied batch {i}", table.name); } Err(e) => { return Err(RoutineFailure::error(Message::new( "SeedSingleTable".to_string(), - format!("Failed to copy batch for {table_name}: {e}"), + format!("Failed to copy batch for {}: {e}", table.name), ))); } } } - Ok(format!("✓ {table_name}: copied from remote")) + Ok(format!("✓ {}: copied from remote", table.name)) } /// Gets the list of tables to seed based on parameters -fn get_tables_to_seed(infra_map: &InfrastructureMap, table_name: Option) -> Vec { - if let Some(ref t) = table_name { - info!("Seeding single table: {}", t); - vec![t.clone()] - } else { - let table_list: Vec = infra_map - .tables - .keys() - .filter(|table| !table.starts_with("_MOOSE")) - .cloned() - .collect(); - info!( - "Seeding {} tables (excluding internal Moose tables)", - table_list.len() - ); - table_list - } +fn get_tables_to_seed(infra_map: &InfrastructureMap, table_name: Option) -> Vec<&Table> { + let table_list: Vec<_> = infra_map + .tables + .values() + .filter(|table| match &table_name { + None => !table.name.starts_with("_MOOSE"), + Some(name) => &table.name == name, + }) + .collect(); + info!( + "Seeding {} tables (excluding internal Moose tables)", + table_list.len() + ); + + table_list } /// Performs the complete ClickHouse seeding operation including infrastructure loading, /// table validation, and data copying async fn seed_clickhouse_operation( project: &Project, - connection_string: &str, + clickhouse_url: &str, table: Option, limit: Option, order_by: Option<&str>, @@ -329,11 +351,11 @@ async fn seed_clickhouse_operation( // Load infrastructure map let infra_map = load_infrastructure_map(project).await?; - // Parse connection string - let remote_config = parse_clickhouse_connection_string(connection_string).map_err(|e| { + // Parse ClickHouse URL + let remote_config = parse_clickhouse_connection_string(clickhouse_url).map_err(|e| { RoutineFailure::error(Message::new( "SeedClickhouse".to_string(), - format!("Invalid connection string: {e}"), + format!("Invalid ClickHouse URL: {e}"), )) })?; @@ -371,10 +393,12 @@ async fn seed_clickhouse_operation( } /// Get list of available tables from remote ClickHouse database +/// Returns a set of (database, table_name) tuples async fn get_remote_tables( local_clickhouse: &ClickHouseClient, remote_config: &ClickHouseConfig, -) -> Result, RoutineFailure> { + other_dbs: &[&str], +) -> Result, RoutineFailure> { let remote_host_and_port = format!("{}:{}", remote_config.host, remote_config.native_port); let sql = build_remote_tables_query( @@ -382,6 +406,7 @@ async fn get_remote_tables( &remote_config.user, &remote_config.password, &remote_config.db_name, + other_dbs, ); debug!("Querying remote tables: {}", sql); @@ -407,13 +432,13 @@ pub async fn handle_seed_command( ) -> Result { match &seed_args.command { Some(SeedSubcommands::Clickhouse { - connection_string, + clickhouse_url, limit, all, table, order_by, }) => { - let resolved_connection_string = match connection_string { + let resolved_clickhouse_url = match clickhouse_url { Some(s) => s.clone(), None => { let repo = KeyringSecretRepository; @@ -422,27 +447,27 @@ pub async fn handle_seed_command( Ok(None) => { return Err(RoutineFailure::error(Message::new( "SeedClickhouse".to_string(), - "No connection string provided and none saved. Pass --connection-string or save one via `moose init --from-remote`.".to_string(), + "No ClickHouse URL provided and none saved. Pass --clickhouse-url or save one via `moose init --from-remote`.".to_string(), ))) } Err(e) => { return Err(RoutineFailure::error(Message::new( "SeedClickhouse".to_string(), - format!("Failed to read saved connection string from keychain: {e:?}"), + format!("Failed to read saved ClickHouse URL from keychain: {e:?}"), ))) } } } }; - info!("Running seed clickhouse command with connection string: {resolved_connection_string}"); + info!("Running seed clickhouse command with ClickHouse URL: {resolved_clickhouse_url}"); let (local_db_name, remote_db_name, summary) = with_spinner_completion_async( "Initializing database seeding operation...", "Database seeding completed", seed_clickhouse_operation( project, - &resolved_connection_string, + &resolved_clickhouse_url, table.clone(), if *all { None } else { Some(*limit) }, order_by.as_deref(), @@ -481,13 +506,23 @@ pub async fn seed_clickhouse_tables( // Get the list of tables to seed let tables = get_tables_to_seed(infra_map, table_name.clone()); + let other_dbs: Vec<&str> = tables + .iter() + .filter_map(|t| t.database.as_deref()) + .collect(); // Get available remote tables for validation (unless specific table is requested) - let remote_tables = if table_name.is_some() { + let remote_tables = if let Some(name) = table_name { + if tables.is_empty() { + return Err(RoutineFailure::error(Message::new( + "Table".to_string(), + format!("{name} not found."), + ))); + } // Skip validation if user specified a specific table None } else { - match get_remote_tables(local_clickhouse, remote_config).await { + match get_remote_tables(local_clickhouse, remote_config, &other_dbs).await { Ok(tables) => Some(tables), Err(e) => { warn!("Failed to query remote tables for validation: {:?}", e); @@ -504,28 +539,24 @@ pub async fn seed_clickhouse_tables( }; // Process each table - for table_name in tables { + for table in tables { // Check if table should be skipped due to validation - if should_skip_table(&table_name, &remote_tables) { - debug!( + if should_skip_table( + &table.database, + &table.name, + &remote_config.db_name, + &remote_tables, + ) { + info!( "Table '{}' exists locally but not on remote - skipping", - table_name + table.name ); - summary.push(format!("⚠️ {}: skipped (not found on remote)", table_name)); + summary.push(format!("⚠️ {}: skipped (not found on remote)", table.name)); continue; } // Attempt to seed the single table - match seed_single_table( - infra_map, - local_clickhouse, - remote_config, - &table_name, - limit, - order_by, - ) - .await - { + match seed_single_table(local_clickhouse, remote_config, table, limit, order_by).await { Ok(success_msg) => { summary.push(success_msg); } @@ -534,14 +565,14 @@ pub async fn seed_clickhouse_tables( // Table not found on remote, skip gracefully debug!( "Table '{}' not found on remote database - skipping", - table_name + table.name ); - summary.push(format!("⚠️ {}: skipped (not found on remote)", table_name)); + summary.push(format!("⚠️ {}: skipped (not found on remote)", table.name)); } else { // Other errors should be added as failures summary.push(format!( "✗ {}: failed to copy - {}", - table_name, e.message.details + table.name, e.message.details )); } } @@ -555,6 +586,57 @@ pub async fn seed_clickhouse_tables( #[cfg(test)] mod tests { use super::*; + use crate::framework::core::infrastructure::table::OrderBy; + use crate::framework::core::infrastructure_map::{PrimitiveSignature, PrimitiveTypes}; + use crate::framework::core::partial_infrastructure_map::LifeCycle; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + use std::collections::HashMap; + + /// Helper function to create a minimal test Table + fn create_test_table(name: &str, database: Option) -> Table { + Table { + name: name.to_string(), + columns: vec![], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: None, + source_primitive: PrimitiveSignature { + name: "test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::default_for_deserialization(), + indexes: vec![], + database, + engine_params_hash: None, + table_settings: None, + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, + } + } + + /// Helper function to create a minimal test InfrastructureMap + fn create_test_infra_map(tables: HashMap) -> InfrastructureMap { + InfrastructureMap { + default_database: "default".to_string(), + topics: HashMap::new(), + api_endpoints: HashMap::new(), + tables, + views: HashMap::new(), + topic_to_table_sync_processes: HashMap::new(), + topic_to_topic_sync_processes: HashMap::new(), + function_processes: HashMap::new(), + block_db_processes: crate::framework::core::infrastructure::olap_process::OlapProcess {}, + consumption_api_web_server: crate::framework::core::infrastructure::consumption_webserver::ConsumptionApiWebServer {}, + orchestration_workers: HashMap::new(), + sql_resources: HashMap::new(), + workflows: HashMap::new(), + web_apps: HashMap::new(), + } + } #[test] fn test_validate_database_name_valid() { @@ -574,19 +656,32 @@ mod tests { #[test] fn test_build_remote_tables_query() { - let query = build_remote_tables_query("host:9440", "user", "pass", "mydb"); - let expected = "SELECT name FROM remoteSecure('host:9440', 'system', 'tables', 'user', 'pass') WHERE database = 'mydb'"; + let query = build_remote_tables_query("host:9440", "user", "pass", "mydb", &[]); + let expected = "SELECT database, name FROM remoteSecure('host:9440', 'system', 'tables', 'user', 'pass') WHERE database IN ('mydb')"; + assert_eq!(query, expected); + } + + #[test] + fn test_build_remote_tables_query_with_other_dbs() { + let query = build_remote_tables_query( + "host:9440", + "user", + "pass", + "mydb", + &["otherdb1", "otherdb2"], + ); + let expected = "SELECT database, name FROM remoteSecure('host:9440', 'system', 'tables', 'user', 'pass') WHERE database IN ('mydb', 'otherdb1', 'otherdb2')"; assert_eq!(query, expected); } #[test] fn test_parse_remote_tables_response_valid() { - let response = "table1\ntable2\n table3 \n\n"; + let response = "db1\ttable1\ndb1\ttable2\ndb2\ttable3\n\n"; let result = parse_remote_tables_response(response); assert_eq!(result.len(), 3); - assert!(result.contains("table1")); - assert!(result.contains("table2")); - assert!(result.contains("table3")); + assert!(result.contains(&("db1".to_string(), "table1".to_string()))); + assert!(result.contains(&("db1".to_string(), "table2".to_string()))); + assert!(result.contains(&("db2".to_string(), "table3".to_string()))); } #[test] @@ -599,16 +694,64 @@ mod tests { #[test] fn test_should_skip_table_when_not_in_remote() { let mut remote_tables = HashSet::new(); - remote_tables.insert("table1".to_string()); - remote_tables.insert("table2".to_string()); - - assert!(!should_skip_table("table1", &Some(remote_tables.clone()))); - assert!(should_skip_table("table3", &Some(remote_tables))); + remote_tables.insert(("mydb".to_string(), "table1".to_string())); + remote_tables.insert(("mydb".to_string(), "table2".to_string())); + + // Table exists in remote (using default db) + assert!(!should_skip_table( + &None, + "table1", + "mydb", + &Some(remote_tables.clone()) + )); + // Table exists in remote (with explicit db) + assert!(!should_skip_table( + &Some("mydb".to_string()), + "table1", + "mydb", + &Some(remote_tables.clone()) + )); + // Table doesn't exist in remote + assert!(should_skip_table( + &None, + "table3", + "mydb", + &Some(remote_tables) + )); } #[test] fn test_should_skip_table_when_no_validation() { - assert!(!should_skip_table("any_table", &None)); + assert!(!should_skip_table(&None, "any_table", "mydb", &None)); + } + + #[test] + fn test_should_skip_table_with_other_db() { + let mut remote_tables = HashSet::new(); + remote_tables.insert(("mydb".to_string(), "table1".to_string())); + remote_tables.insert(("otherdb".to_string(), "table2".to_string())); + + // Table exists in default db + assert!(!should_skip_table( + &None, + "table1", + "mydb", + &Some(remote_tables.clone()) + )); + // Table exists in other db + assert!(!should_skip_table( + &Some("otherdb".to_string()), + "table2", + "mydb", + &Some(remote_tables.clone()) + )); + // Table doesn't exist in specified db (even though it exists in default db) + assert!(should_skip_table( + &Some("otherdb".to_string()), + "table1", + "mydb", + &Some(remote_tables) + )); } #[test] @@ -638,34 +781,42 @@ mod tests { #[test] fn test_build_order_by_clause_with_provided_order() { - let infra_map = InfrastructureMap::default(); + let table = create_test_table("my_table", None); - let result = build_order_by_clause("my_table", &infra_map, Some("id ASC"), 1000, 500); + let result = build_order_by_clause(&table, Some("id ASC"), 1000, 500); assert!(result.is_ok()); assert_eq!(result.unwrap(), "ORDER BY id ASC"); } #[test] - fn test_build_order_by_clause_table_not_found() { - let infra_map = InfrastructureMap::default(); + fn test_build_order_by_clause_without_order_by_and_no_provided_order() { + let mut table = create_test_table("my_table", None); + table.order_by = OrderBy::Fields(vec![]); // No ORDER BY fields - let result = build_order_by_clause("nonexistent_table", &infra_map, None, 1000, 500); + let result = build_order_by_clause(&table, None, 1000, 500); assert!(result.is_err()); if let Err(e) = result { assert_eq!(e.message.action, "Seed"); - assert!(e.message.details.contains("not found")); + assert!(e.message.details.contains("without ORDER BY")); } } #[test] fn test_get_tables_to_seed_single_table() { - let infra_map = InfrastructureMap::default(); + let mut tables = HashMap::new(); + tables.insert( + "specific_table".to_string(), + create_test_table("specific_table", None), + ); + + let infra_map = create_test_infra_map(tables); let result = get_tables_to_seed(&infra_map, Some("specific_table".to_string())); assert_eq!(result.len(), 1); - assert_eq!(result[0], "specific_table"); + assert_eq!(result[0].name, "specific_table"); + assert_eq!(result[0].database, None); } #[test] diff --git a/apps/framework-cli/src/cli/routines/templates.rs b/apps/framework-cli/src/cli/routines/templates.rs index b6e7b80609..cfc79035b3 100644 --- a/apps/framework-cli/src/cli/routines/templates.rs +++ b/apps/framework-cli/src/cli/routines/templates.rs @@ -1,7 +1,6 @@ use flate2::read::GzDecoder; use futures::StreamExt; use home::home_dir; -use log::warn; use regex::Regex; use std::fs::File; use std::io::Write; @@ -10,6 +9,7 @@ use std::path::PathBuf; use std::sync::Arc; use tar::Archive; use toml::Value; +use tracing::warn; use super::RoutineFailure; use super::RoutineSuccess; diff --git a/apps/framework-cli/src/cli/routines/truncate_table.rs b/apps/framework-cli/src/cli/routines/truncate_table.rs index 3c35e1f416..29abe3f70b 100644 --- a/apps/framework-cli/src/cli/routines/truncate_table.rs +++ b/apps/framework-cli/src/cli/routines/truncate_table.rs @@ -4,7 +4,7 @@ use crate::infrastructure::olap::clickhouse::{ check_ready, create_client, extract_order_by_from_create_query, run_query, }; use crate::project::Project; -use log::{info, warn}; +use tracing::{info, warn}; fn escape_ident(ident: &str) -> String { ident.replace('`', "``") diff --git a/apps/framework-cli/src/cli/routines/validate.rs b/apps/framework-cli/src/cli/routines/validate.rs index fd433ac784..a3266ce8bb 100644 --- a/apps/framework-cli/src/cli/routines/validate.rs +++ b/apps/framework-cli/src/cli/routines/validate.rs @@ -1,4 +1,4 @@ -use log::debug; +use tracing::debug; use super::{RoutineFailure, RoutineSuccess}; use crate::cli::display::Message; diff --git a/apps/framework-cli/src/cli/settings.rs b/apps/framework-cli/src/cli/settings.rs index 3db16de5c3..e4a5773fc1 100644 --- a/apps/framework-cli/src/cli/settings.rs +++ b/apps/framework-cli/src/cli/settings.rs @@ -31,10 +31,10 @@ use config::{Config, ConfigError, Environment, File}; use home::home_dir; -use log::warn; use serde::Deserialize; use std::path::PathBuf; use toml_edit::{table, value, DocumentMut, Entry, Item}; +use tracing::warn; use super::display::{Message, MessageType}; use super::logger::LoggerSettings; diff --git a/apps/framework-cli/src/cli/watcher.rs b/apps/framework-cli/src/cli/watcher.rs index 398317557e..4c899f3555 100644 --- a/apps/framework-cli/src/cli/watcher.rs +++ b/apps/framework-cli/src/cli/watcher.rs @@ -18,7 +18,6 @@ /// 3. After a short delay (debouncing), changes are processed to update the infrastructure /// 4. The updated infrastructure is applied to the system use crate::framework; -use log::info; use notify::event::ModifyKind; use notify::{Event, EventHandler, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; use std::collections::HashSet; @@ -26,6 +25,7 @@ use std::sync::Arc; use std::time::Duration; use std::{io::Error, path::PathBuf}; use tokio::sync::RwLock; +use tracing::info; use crate::framework::core::infrastructure_map::{ApiChange, InfrastructureMap}; @@ -48,7 +48,7 @@ struct EventListener { impl EventHandler for EventListener { fn handle_event(&mut self, event: notify::Result) { - log::debug!("Received Watcher event: {:?}", event); + tracing::debug!("Received Watcher event: {:?}", event); match event { Ok(event) => { self.tx.send_if_modified(|events| { @@ -57,7 +57,7 @@ impl EventHandler for EventListener { }); } Err(e) => { - log::error!("Watcher Error: {:?}", e); + tracing::error!("Watcher Error: {:?}", e); } } } @@ -133,7 +133,7 @@ async fn watch( processing_coordinator: ProcessingCoordinator, mut shutdown_rx: tokio::sync::watch::Receiver, ) -> Result<(), anyhow::Error> { - log::debug!( + tracing::debug!( "Starting file watcher for project: {:?}", project.app_dir().display() ); @@ -148,7 +148,7 @@ async fn watch( .watch(project.app_dir().as_ref(), RecursiveMode::Recursive) .map_err(|e| Error::other(format!("Failed to watch file: {e}")))?; - log::debug!("Watcher setup complete, entering main loop"); + tracing::debug!("Watcher setup complete, entering main loop"); loop { tokio::select! { @@ -157,7 +157,7 @@ async fn watch( return Ok(()); } Ok(()) = rx.changed() => { - log::debug!("Received change notification, current changes: {:?}", rx.borrow()); + tracing::debug!("Received change notification, current changes: {:?}", rx.borrow()); } _ = tokio::time::sleep(Duration::from_secs(1)) => { let should_process = { @@ -166,7 +166,7 @@ async fn watch( }; if should_process { - log::debug!("Debounce period elapsed, processing changes"); + tracing::debug!("Debounce period elapsed, processing changes"); receiver_ack.send_replace(EventBuckets::default()); rx.mark_unchanged(); diff --git a/apps/framework-cli/src/framework/consumption/loader.rs b/apps/framework-cli/src/framework/consumption/loader.rs index ac4fc6adf1..eeac713a83 100644 --- a/apps/framework-cli/src/framework/consumption/loader.rs +++ b/apps/framework-cli/src/framework/consumption/loader.rs @@ -35,7 +35,7 @@ pub async fn load_consumption(project: &Project) -> Result log::info!("Terminated running workflows: {:?}", success), - Err(e) => log::warn!("Failed to terminate running workflows: {:?}", e), + Ok(success) => tracing::info!("Terminated running workflows: {:?}", success), + Err(e) => tracing::warn!("Failed to terminate running workflows: {:?}", e), } execute_scheduled_workflows(project, &plan.target_infra_map.workflows).await; diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index bf665073fb..08adf9cca3 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -16,16 +16,17 @@ /// - Identifying structural differences in tables use crate::{ framework::core::{ + infrastructure::sql_resource::SqlResource, infrastructure::table::Table, - infrastructure_map::{InfrastructureMap, OlapChange, TableChange}, + infrastructure_map::{Change, InfrastructureMap, OlapChange, TableChange}, }, infrastructure::olap::{OlapChangesError, OlapOperations}, project::Project, }; -use log::debug; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use thiserror::Error; +use tracing::debug; /// Represents errors that can occur during infrastructure reality checking. #[derive(Debug, Error)] @@ -56,6 +57,12 @@ pub struct InfraDiscrepancies { pub missing_tables: Vec, /// Tables that exist in both but have structural differences pub mismatched_tables: Vec, + /// SQL resources (views/MVs) that exist in reality but are not in the map + pub unmapped_sql_resources: Vec, + /// SQL resources that are in the map but don't exist in reality + pub missing_sql_resources: Vec, + /// SQL resources that exist in both but have differences + pub mismatched_sql_resources: Vec, } impl InfraDiscrepancies { @@ -64,6 +71,9 @@ impl InfraDiscrepancies { self.unmapped_tables.is_empty() && self.missing_tables.is_empty() && self.mismatched_tables.is_empty() + && self.unmapped_sql_resources.is_empty() + && self.missing_sql_resources.is_empty() + && self.mismatched_sql_resources.is_empty() } } @@ -292,17 +302,107 @@ impl InfraRealityChecker { } } + // Fetch and compare SQL resources (views and materialized views) + debug!("Fetching actual SQL resources from OLAP databases"); + + let mut actual_sql_resources = Vec::new(); + + // Query each database and merge results + for database in &all_databases { + debug!("Fetching SQL resources from database: {}", database); + let mut db_sql_resources = self + .olap_client + .list_sql_resources(database, &infra_map.default_database) + .await?; + actual_sql_resources.append(&mut db_sql_resources); + } + + debug!( + "Found {} SQL resources across all databases", + actual_sql_resources.len() + ); + + // Create a map of actual SQL resources by name + let actual_sql_resource_map: HashMap = actual_sql_resources + .into_iter() + .map(|r| (r.name.clone(), r)) + .collect(); + + debug!( + "Actual SQL resource IDs: {:?}", + actual_sql_resource_map.keys() + ); + debug!( + "Infrastructure map SQL resource IDs: {:?}", + infra_map.sql_resources.keys() + ); + + // Find unmapped SQL resources (exist in reality but not in map) + let unmapped_sql_resources: Vec<_> = actual_sql_resource_map + .values() + .filter(|resource| !infra_map.sql_resources.contains_key(&resource.name)) + .cloned() + .collect(); + + debug!( + "Found {} unmapped SQL resources: {:?}", + unmapped_sql_resources.len(), + unmapped_sql_resources + .iter() + .map(|r| &r.name) + .collect::>() + ); + + // Find missing SQL resources (in map but don't exist in reality) + let missing_sql_resources: Vec = infra_map + .sql_resources + .keys() + .filter(|id| !actual_sql_resource_map.contains_key(*id)) + .cloned() + .collect(); + + debug!( + "Found {} missing SQL resources: {:?}", + missing_sql_resources.len(), + missing_sql_resources + ); + + // Find mismatched SQL resources (exist in both but differ) + let mut mismatched_sql_resources = Vec::new(); + for (id, desired) in &infra_map.sql_resources { + if let Some(actual) = actual_sql_resource_map.get(id) { + if actual != desired { + debug!("Found mismatch in SQL resource: {}", id); + mismatched_sql_resources.push(OlapChange::SqlResource(Change::Updated { + before: Box::new(actual.clone()), + after: Box::new(desired.clone()), + })); + } + } + } + + debug!( + "Found {} mismatched SQL resources", + mismatched_sql_resources.len() + ); + let discrepancies = InfraDiscrepancies { unmapped_tables, missing_tables, mismatched_tables, + unmapped_sql_resources, + missing_sql_resources, + mismatched_sql_resources, }; debug!( - "Reality check complete. Found {} unmapped, {} missing, and {} mismatched tables", + "Reality check complete. Found {} unmapped, {} missing, and {} mismatched tables, {} unmapped SQL resources, {} missing SQL resources, {} mismatched SQL resources", discrepancies.unmapped_tables.len(), discrepancies.missing_tables.len(), - discrepancies.mismatched_tables.len() + discrepancies.mismatched_tables.len(), + discrepancies.unmapped_sql_resources.len(), + discrepancies.missing_sql_resources.len(), + discrepancies.mismatched_sql_resources.len() ); if discrepancies.is_empty() { @@ -335,6 +435,7 @@ mod tests { // Mock OLAP client for testing struct MockOlapClient { tables: Vec, + sql_resources: Vec, } #[async_trait] @@ -346,6 +447,17 @@ mod tests { ) -> Result<(Vec
, Vec), OlapChangesError> { Ok((self.tables.clone(), vec![])) } + + async fn list_sql_resources( + &self, + _db_name: &str, + _default_database: &str, + ) -> Result< + Vec, + OlapChangesError, + > { + Ok(self.sql_resources.clone()) + } } // Helper function to create a test project @@ -407,7 +519,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -420,6 +532,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, } } @@ -434,6 +548,7 @@ mod tests { database: Some(DEFAULT_DATABASE_NAME.to_string()), ..table.clone() }], + sql_resources: vec![], }; // Create empty infrastructure map @@ -503,6 +618,7 @@ mod tests { database: Some(DEFAULT_DATABASE_NAME.to_string()), ..actual_table.clone() }], + sql_resources: vec![], }; let mut infra_map = InfrastructureMap { @@ -577,6 +693,7 @@ mod tests { database: Some(DEFAULT_DATABASE_NAME.to_string()), ..actual_table.clone() }], + sql_resources: vec![], }; let mut infra_map = InfrastructureMap { @@ -633,17 +750,18 @@ mod tests { let mut infra_table = create_base_table("test_table"); // Set different engine values - actual_table.engine = Some(ClickhouseEngine::ReplacingMergeTree { + actual_table.engine = ClickhouseEngine::ReplacingMergeTree { ver: None, is_deleted: None, - }); - infra_table.engine = None; + }; + infra_table.engine = ClickhouseEngine::MergeTree; let mock_client = MockOlapClient { tables: vec![Table { database: Some(DEFAULT_DATABASE_NAME.to_string()), ..actual_table.clone() }], + sql_resources: vec![], }; let mut infra_map = InfrastructureMap { @@ -680,12 +798,78 @@ mod tests { match &discrepancies.mismatched_tables[0] { OlapChange::Table(TableChange::Updated { before, after, .. }) => { assert!(matches!( - before.engine.as_ref(), - Some(ClickhouseEngine::ReplacingMergeTree { .. }) + &before.engine, + ClickhouseEngine::ReplacingMergeTree { .. } )); - assert_eq!(after.engine.as_ref(), None); + assert!(matches!(&after.engine, ClickhouseEngine::MergeTree)); } _ => panic!("Expected TableChange::Updated variant"), } } + + #[tokio::test] + async fn test_reality_checker_sql_resource_mismatch() { + let actual_resource = SqlResource { + name: "test_view".to_string(), + database: None, + setup: vec!["CREATE VIEW test_view AS SELECT 1".to_string()], + teardown: vec!["DROP VIEW test_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let infra_resource = SqlResource { + name: "test_view".to_string(), + database: None, + setup: vec!["CREATE VIEW test_view AS SELECT 2".to_string()], // Difference here + teardown: vec!["DROP VIEW test_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![actual_resource.clone()], + }; + + let mut infra_map = InfrastructureMap { + default_database: DEFAULT_DATABASE_NAME.to_string(), + topics: HashMap::new(), + api_endpoints: HashMap::new(), + tables: HashMap::new(), + views: HashMap::new(), + topic_to_table_sync_processes: HashMap::new(), + topic_to_topic_sync_processes: HashMap::new(), + function_processes: HashMap::new(), + block_db_processes: OlapProcess {}, + consumption_api_web_server: ConsumptionApiWebServer {}, + orchestration_workers: HashMap::new(), + sql_resources: HashMap::new(), + workflows: HashMap::new(), + web_apps: HashMap::new(), + }; + + infra_map + .sql_resources + .insert(infra_resource.name.clone(), infra_resource.clone()); + + let checker = InfraRealityChecker::new(mock_client); + let project = create_test_project(); + + let discrepancies = checker.check_reality(&project, &infra_map).await.unwrap(); + + assert!(discrepancies.unmapped_sql_resources.is_empty()); + assert!(discrepancies.missing_sql_resources.is_empty()); + assert_eq!(discrepancies.mismatched_sql_resources.len(), 1); + + match &discrepancies.mismatched_sql_resources[0] { + OlapChange::SqlResource(Change::Updated { before, after }) => { + assert_eq!(before.name, "test_view"); + assert_eq!(after.name, "test_view"); + assert_eq!(before.setup[0], "CREATE VIEW test_view AS SELECT 1"); + assert_eq!(after.setup[0], "CREATE VIEW test_view AS SELECT 2"); + } + _ => panic!("Expected SqlResource Updated variant"), + } + } } diff --git a/apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs b/apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs index 5df774e6bf..f7c0b1cbd7 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs @@ -1,3 +1,4 @@ +use crate::infrastructure::olap::clickhouse::sql_parser::normalize_sql_for_comparison; use crate::proto::infrastructure_map::SqlResource as ProtoSqlResource; use serde::{Deserialize, Serialize}; @@ -9,11 +10,17 @@ use super::InfrastructureSignature; /// This struct holds information about a SQL resource, including its name, /// setup and teardown scripts, and its data lineage relationships with other /// infrastructure components. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[derive(Debug, Serialize, Deserialize, Clone, Eq)] pub struct SqlResource { /// The unique name identifier for the SQL resource. pub name: String, + /// The database where this SQL resource exists. + /// - None means use the default database + /// - Some(db) means the resource is in a specific database + #[serde(skip_serializing_if = "Option::is_none", default)] + pub database: Option, + /// A list of SQL commands or script paths executed during the setup phase. pub setup: Vec, /// A list of SQL commands or script paths executed during the teardown phase. @@ -28,10 +35,26 @@ pub struct SqlResource { } impl SqlResource { + /// Returns a unique identifier for this SQL resource. + /// + /// The ID format matches the table ID format: `{database}_{name}` + /// This ensures resources in different databases don't collide. + /// + /// # Arguments + /// * `default_database` - The default database name to use when `database` is None + /// + /// # Returns + /// A string in the format `{database}_{name}` + pub fn id(&self, default_database: &str) -> String { + let db = self.database.as_deref().unwrap_or(default_database); + format!("{}_{}", db, self.name) + } + /// Converts the `SqlResource` struct into its corresponding Protobuf representation. pub fn to_proto(&self) -> ProtoSqlResource { ProtoSqlResource { name: self.name.clone(), + database: self.database.clone(), setup: self.setup.clone(), teardown: self.teardown.clone(), special_fields: Default::default(), @@ -44,6 +67,7 @@ impl SqlResource { pub fn from_proto(proto: ProtoSqlResource) -> Self { Self { name: proto.name, + database: proto.database, setup: proto.setup, teardown: proto.teardown, pulls_data_from: proto @@ -74,3 +98,283 @@ impl DataLineage for SqlResource { self.pushes_data_to.clone() } } + +/// Custom PartialEq implementation that normalizes SQL statements before comparing. +/// This prevents false differences due to cosmetic formatting (whitespace, casing, backticks). +impl PartialEq for SqlResource { + fn eq(&self, other: &Self) -> bool { + // Name must match exactly + if self.name != other.name { + return false; + } + + // Database comparison: treat None as equivalent to any explicit database + // This allows resources from user code (database=None) to match introspected + // resources (database=Some("local")), since both resolve to the same ID + // We don't compare database here because the HashMap key already includes it + + // Data lineage must match exactly + if self.pulls_data_from != other.pulls_data_from + || self.pushes_data_to != other.pushes_data_to + { + return false; + } + + // Setup and teardown scripts must match after normalization + if self.setup.len() != other.setup.len() || self.teardown.len() != other.teardown.len() { + return false; + } + + for (self_sql, other_sql) in self.setup.iter().zip(other.setup.iter()) { + // Pass empty string for default_database since the comparison happens after HashMap + // lookup by ID (which includes database prefix). Both SQL statements are from the + // same database context, so we only need AST-based normalization (backticks, casing, + // whitespace) without database prefix stripping. User-defined SQL typically doesn't + // include explicit database prefixes (e.g., "FROM local.Table"). + let self_normalized = normalize_sql_for_comparison(self_sql, ""); + let other_normalized = normalize_sql_for_comparison(other_sql, ""); + if self_normalized != other_normalized { + return false; + } + } + + for (self_sql, other_sql) in self.teardown.iter().zip(other.teardown.iter()) { + let self_normalized = normalize_sql_for_comparison(self_sql, ""); + let other_normalized = normalize_sql_for_comparison(other_sql, ""); + if self_normalized != other_normalized { + return false; + } + } + + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn create_test_resource(name: &str, setup: Vec<&str>, teardown: Vec<&str>) -> SqlResource { + SqlResource { + name: name.to_string(), + database: None, + setup: setup.into_iter().map(String::from).collect(), + teardown: teardown.into_iter().map(String::from).collect(), + pulls_data_from: vec![], + pushes_data_to: vec![], + } + } + + #[test] + fn test_sql_resource_equality_exact_match() { + let resource1 = create_test_resource( + "TestMV", + vec!["CREATE MATERIALIZED VIEW TestMV AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + let resource2 = create_test_resource( + "TestMV", + vec!["CREATE MATERIALIZED VIEW TestMV AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + + assert_eq!(resource1, resource2); + } + + #[test] + fn test_sql_resource_equality_with_case_differences() { + let resource_lowercase = create_test_resource( + "TestMV", + vec!["create view TestMV as select count(id) from users"], + vec!["drop view if exists TestMV"], + ); + let resource_uppercase = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT COUNT(id) FROM users"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + + assert_eq!(resource_lowercase, resource_uppercase); + } + + #[test] + fn test_sql_resource_equality_comprehensive() { + // User-defined (from TypeScript/Python with backticks and formatting) + let user_defined = create_test_resource( + "BarAggregated_MV", + vec![ + "CREATE MATERIALIZED VIEW IF NOT EXISTS `BarAggregated_MV`\n TO `BarAggregated`\n AS SELECT\n count(`primaryKey`) as totalRows\n FROM `Bar`" + ], + vec!["DROP VIEW IF EXISTS `BarAggregated_MV`"], + ); + + // Introspected from ClickHouse (no backticks, single line, uppercase keywords) + let introspected = create_test_resource( + "BarAggregated_MV", + vec![ + "CREATE MATERIALIZED VIEW IF NOT EXISTS BarAggregated_MV TO BarAggregated AS SELECT COUNT(primaryKey) AS totalRows FROM Bar" + ], + vec!["DROP VIEW IF EXISTS `BarAggregated_MV`"], + ); + + assert_eq!(user_defined, introspected); + } + + #[test] + fn test_sql_resource_inequality_different_names() { + let resource1 = create_test_resource( + "MV1", + vec!["CREATE VIEW MV1 AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS MV1"], + ); + let resource2 = create_test_resource( + "MV2", + vec!["CREATE VIEW MV2 AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS MV2"], + ); + + assert_ne!(resource1, resource2); + } + + #[test] + fn test_sql_resource_inequality_different_sql() { + let resource1 = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT col1 FROM table"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + let resource2 = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT col2 FROM table"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + + assert_ne!(resource1, resource2); + } + + #[test] + fn test_sql_resource_inequality_different_data_lineage() { + let mut resource1 = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + resource1.pulls_data_from = vec![InfrastructureSignature::Table { + id: "Table1".to_string(), + }]; + + let mut resource2 = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + resource2.pulls_data_from = vec![InfrastructureSignature::Table { + id: "Table2".to_string(), + }]; + + assert_ne!(resource1, resource2); + } + + #[test] + fn test_sql_resource_equality_multiple_statements() { + let resource1 = create_test_resource( + "TestMV", + vec![ + "CREATE VIEW TestMV AS SELECT * FROM source", + "CREATE INDEX idx ON TestMV (col1)", + ], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + let resource2 = create_test_resource( + "TestMV", + vec![ + "create view TestMV as select * from source", + "create index idx on TestMV (col1)", + ], + vec!["drop view if exists TestMV"], + ); + + assert_eq!(resource1, resource2); + } + + #[test] + fn test_sql_resource_id_with_database() { + // Test with explicit database + let resource_with_db = SqlResource { + name: "MyView".to_string(), + database: Some("custom".to_string()), + setup: vec![], + teardown: vec![], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + assert_eq!(resource_with_db.id("default"), "custom_MyView"); + + // Test with None database (uses default) + let resource_no_db = SqlResource { + name: "MyView".to_string(), + database: None, + setup: vec![], + teardown: vec![], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + assert_eq!(resource_no_db.id("default"), "default_MyView"); + } + + #[test] + fn test_sql_resource_equality_ignores_database_field() { + // Resources with different database fields should be equal if they have the same name + // This is because the HashMap key already includes the database, so we don't need to + // compare it during equality checks + let resource_no_db = SqlResource { + name: "MyView".to_string(), + database: None, + setup: vec!["CREATE VIEW MyView AS SELECT * FROM table1".to_string()], + teardown: vec!["DROP VIEW IF EXISTS MyView".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let resource_with_db = SqlResource { + name: "MyView".to_string(), + database: Some("local".to_string()), + setup: vec!["CREATE VIEW MyView AS SELECT * FROM table1".to_string()], + teardown: vec!["DROP VIEW IF EXISTS MyView".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + // These should be equal because database is not compared in PartialEq + assert_eq!(resource_no_db, resource_with_db); + } + + #[test] + fn test_sql_resource_equality_with_normalized_sql() { + // Test that SQL normalization handles whitespace and formatting differences + let resource_formatted = SqlResource { + name: "TestView".to_string(), + database: None, + setup: vec![ + "CREATE VIEW IF NOT EXISTS TestView \n AS SELECT\n `primaryKey`,\n `utcTimestamp`,\n `textLength`\n FROM `Bar`\n WHERE `hasText` = true".to_string() + ], + teardown: vec!["DROP VIEW IF EXISTS `TestView`".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let resource_compact = SqlResource { + name: "TestView".to_string(), + database: None, + setup: vec![ + "CREATE VIEW IF NOT EXISTS TestView AS SELECT primaryKey, utcTimestamp, textLength FROM Bar WHERE hasText = true".to_string() + ], + teardown: vec!["DROP VIEW IF EXISTS `TestView`".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + // These should be equal after SQL normalization + assert_eq!(resource_formatted, resource_compact); + } +} diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index f27f6bbcdf..52fdde9897 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -275,7 +275,7 @@ pub struct Table { #[serde(skip_serializing_if = "Option::is_none", default)] pub sample_by: Option, #[serde(default)] - pub engine: Option, + pub engine: ClickhouseEngine, pub version: Option, pub source_primitive: PrimitiveSignature, pub metadata: Option, @@ -299,6 +299,13 @@ pub struct Table { /// Table-level TTL expression (without leading 'TTL') #[serde(skip_serializing_if = "Option::is_none", default)] pub table_ttl_setting: Option, + /// Optional cluster name for ON CLUSTER support in ClickHouse + #[serde(skip_serializing_if = "Option::is_none", default)] + pub cluster_name: Option, + /// Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified) + /// Allows for complex primary keys using functions or different column ordering + #[serde(skip_serializing_if = "Option::is_none", default)] + pub primary_key_expression: Option, } impl Table { @@ -332,20 +339,16 @@ impl Table { use sha2::{Digest, Sha256}; // Combine engine hash and database into a single hash - let engine_hash = self.engine.as_ref().map(|e| e.non_alterable_params_hash()); + let engine_hash = self.engine.non_alterable_params_hash(); - // If we have neither engine hash nor database, return None - if engine_hash.is_none() && self.database.is_none() { - return None; - } + // If we have no database, return None (engine always exists now) + self.database.as_ref()?; // Create a combined hash that includes both engine params and database let mut hasher = Sha256::new(); - // Include engine params hash if it exists - if let Some(ref hash) = engine_hash { - hasher.update(hash.as_bytes()); - } + // Include engine params hash + hasher.update(engine_hash.as_bytes()); // Include database field if let Some(ref db) = self.database { @@ -374,11 +377,7 @@ impl Table { .map(|c| format!("{}: {}", c.name, c.data_type)) .collect::>() .join(", "); - let engine_str = self - .engine - .as_ref() - .map(|e| format!(" - engine: {}", Into::::into(e.clone()))) - .unwrap_or_default(); + let engine_str = format!(" - engine: {}", Into::::into(self.engine.clone())); format!( "Table: {} Version {:?} - {} - {}{}", self.name, self.version, columns_str, self.order_by, engine_str @@ -407,19 +406,96 @@ impl Table { .collect() } + /// Returns a normalized representation of the primary key for comparison purposes. + /// + /// This handles both: + /// - `primary_key_expression`: Uses the expression directly + /// - Column-level `primary_key` flags: Builds an expression from column names + /// + /// The result is normalized (trimmed, spaces removed, backticks removed, and outer + /// parentheses stripped for single-element tuples) to enable semantic comparison. + /// For example: + /// - `primary_key_expression: Some("(foo, bar)")` returns "(foo,bar)" + /// - Columns foo, bar with `primary_key: true` returns "(foo,bar)" + /// - `primary_key_expression: Some("foo")` returns "foo" + /// - `primary_key_expression: Some("(foo)")` returns "foo" (outer parens stripped) + /// - Single column foo with `primary_key: true` returns "foo" + pub fn normalized_primary_key_expr(&self) -> String { + let expr = if let Some(ref pk_expr) = self.primary_key_expression { + // Use the explicit primary_key_expression + pk_expr.clone() + } else { + // Build from column-level primary_key flags + let pk_cols = self.primary_key_columns(); + if pk_cols.is_empty() { + String::new() + } else if pk_cols.len() == 1 { + pk_cols[0].to_string() + } else { + format!("({})", pk_cols.join(", ")) + } + }; + + // Normalize: trim, remove backticks, remove spaces + let mut normalized = expr + .trim() + .trim_matches('`') + .replace('`', "") + .replace(" ", ""); + + // Strip outer parentheses if this is a single-element tuple + // E.g., "(col)" -> "col", "(cityHash64(col))" -> "cityHash64(col)" + // But keep "(col1,col2)" as-is + if normalized.starts_with('(') && normalized.ends_with(')') { + // Check if there are any top-level commas (not inside nested parentheses) + let inner = &normalized[1..normalized.len() - 1]; + let has_top_level_comma = { + let mut depth = 0; + let mut found_comma = false; + for ch in inner.chars() { + match ch { + '(' => depth += 1, + ')' => depth -= 1, + ',' if depth == 0 => { + found_comma = true; + break; + } + _ => {} + } + } + found_comma + }; + + // If no top-level comma, it's a single-element tuple - strip outer parens + if !has_top_level_comma { + normalized = inner.to_string(); + } + } + + normalized + } + + pub fn order_by_with_fallback(&self) -> OrderBy { + // table (in infra map created by older version of moose) may leave order_by unspecified, + // but the implicit order_by from primary keys can be the same + // ONLY for the MergeTree family + // S3 supports ORDER BY but does not auto set ORDER BY from PRIMARY KEY + // Buffer, S3Queue, and Distributed don't support ORDER BY + if self.order_by.is_empty() && self.engine.is_merge_tree_family() { + OrderBy::Fields( + self.primary_key_columns() + .iter() + .map(|c| c.to_string()) + .collect(), + ) + } else { + self.order_by.clone() + } + } + pub fn order_by_equals(&self, target: &Table) -> bool { self.order_by == target.order_by - // target may leave order_by unspecified, - // but the implicit order_by from primary keys can be the same - // ONLY for engines that support ORDER BY (MergeTree family and S3) - // Buffer, S3Queue, and Distributed don't support ORDER BY - // When engine is None, ClickHouse defaults to MergeTree - || (target.order_by.is_empty() - && target.engine.as_ref().is_none_or(|e| e.supports_order_by()) - && matches!( - &self.order_by, - OrderBy::Fields(v) if v.iter().map(String::as_str).collect::>() == target.primary_key_columns() - )) + || self.order_by_with_fallback() == target.order_by_with_fallback() } pub fn to_proto(&self) -> ProtoTable { @@ -456,14 +532,11 @@ impl Table { sample_by_expression: self.sample_by.clone(), version: self.version.as_ref().map(|v| v.to_string()), source_primitive: MessageField::some(self.source_primitive.to_proto()), - deduplicate: self - .engine - .as_ref() - .is_some_and(|e| matches!(e, ClickhouseEngine::ReplacingMergeTree { .. })), - engine: MessageField::from_option(self.engine.as_ref().map(|engine| StringValue { - value: engine.clone().to_proto_string(), + deduplicate: matches!(self.engine, ClickhouseEngine::ReplacingMergeTree { .. }), + engine: MessageField::some(StringValue { + value: self.engine.clone().to_proto_string(), special_fields: Default::default(), - })), + }), order_by2: MessageField::some(proto_order_by2), // Store the hash for change detection, including database field engine_params_hash: self @@ -472,6 +545,8 @@ impl Table { .or_else(|| self.compute_non_alterable_params_hash()), table_settings: self.table_settings.clone().unwrap_or_default(), table_ttl_setting: self.table_ttl_setting.clone(), + cluster_name: self.cluster_name.clone(), + primary_key_expression: self.primary_key_expression.clone(), metadata: MessageField::from_option(self.metadata.as_ref().map(|m| { infrastructure_map::Metadata { description: m.description.clone().unwrap_or_default(), @@ -509,7 +584,8 @@ impl Table { ver: None, is_deleted: None, }) - }); + }) + .unwrap_or(ClickhouseEngine::MergeTree); // Engine settings are now handled via table_settings field @@ -578,6 +654,8 @@ impl Table { .collect(), database: proto.database, table_ttl_setting: proto.table_ttl_setting, + cluster_name: proto.cluster_name, + primary_key_expression: proto.primary_key_expression, } } } @@ -1630,7 +1708,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Users".to_string(), @@ -1643,6 +1721,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; assert_eq!(table1.id(DEFAULT_DATABASE_NAME), "local_users"); @@ -1717,4 +1797,146 @@ mod tests { }; assert_eq!(table7.id(DEFAULT_DATABASE_NAME), "local_users_1_0"); } + + #[test] + fn test_order_by_equals_with_implicit_primary_key() { + use crate::framework::core::infrastructure_map::PrimitiveTypes; + + // Test case: actual table has empty order_by (implicit primary key), + // target table has explicit order_by that matches the primary key. + // This should be considered equal for MergeTree engines. + + let columns = vec![ + Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }, + Column { + name: "name".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }, + ]; + + // Actual table from database: empty order_by (implicitly uses primary key) + let actual_table = Table { + name: "test_table".to_string(), + columns: columns.clone(), + order_by: OrderBy::Fields(vec![]), // Empty - will fall back to primary key + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: None, + source_primitive: PrimitiveSignature { + name: "test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, + }; + + // Target table from code: explicit order_by that matches primary key + let target_table = Table { + name: "test_table".to_string(), + columns: columns.clone(), + order_by: OrderBy::Fields(vec!["id".to_string()]), // Explicit order_by + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: None, + source_primitive: PrimitiveSignature { + name: "test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, + }; + + // These should be equal because: + // - actual_table has empty order_by but MergeTree engine + // - actual_table.order_by_with_fallback() returns ["id"] (from primary key) + // - target_table.order_by is ["id"] + // - target_table.order_by_with_fallback() returns ["id"] + // - ["id"] == ["id"] + assert!( + actual_table.order_by_equals(&target_table), + "actual table with empty order_by should equal target with explicit primary key order_by" + ); + + // Reverse direction should also work + assert!( + target_table.order_by_equals(&actual_table), + "target table with explicit primary key order_by should equal actual with empty order_by" + ); + + // Test with different order_by - should NOT be equal + let different_target = Table { + order_by: OrderBy::Fields(vec!["name".to_string()]), + ..target_table.clone() + }; + assert!( + !actual_table.order_by_equals(&different_target), + "tables with different order_by should not be equal" + ); + + // Test with non-MergeTree engine (S3) - empty order_by should stay empty + let actual_s3 = Table { + engine: ClickhouseEngine::S3 { + path: "s3://bucket/path".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: None, + aws_secret_access_key: None, + compression: None, + partition_strategy: None, + partition_columns_in_data_file: None, + }, + ..actual_table.clone() + }; + + let target_s3 = Table { + engine: ClickhouseEngine::S3 { + path: "s3://bucket/path".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: None, + aws_secret_access_key: None, + compression: None, + partition_strategy: None, + partition_columns_in_data_file: None, + }, + ..target_table.clone() + }; + + // For S3 engine, empty order_by doesn't fall back to primary key + assert!( + !actual_s3.order_by_equals(&target_s3), + "S3 engine should not infer order_by from primary key" + ); + } } diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index d5be6a00a4..9b4bec5831 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -494,7 +494,10 @@ fn default_database_name() -> String { /// /// The relationship between the components is maintained by reference rather than by value. /// Helper methods facilitate navigating the map and finding related components. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// Note: This type has a custom `Serialize` implementation that sorts all JSON keys +/// alphabetically for deterministic output in version-controlled migration files. +#[derive(Debug, Clone, Deserialize)] pub struct InfrastructureMap { #[serde(default = "default_database_name")] pub default_database: String, @@ -681,18 +684,18 @@ impl InfrastructureMap { } } None => { - log::error!( + tracing::error!( "Could not find previous version with no change for data model: {} {}", data_model.name, data_model.version ); - log::debug!("Data Models Dump: {:?}", primitive_map.datamodels); + tracing::debug!("Data Models Dump: {:?}", primitive_map.datamodels); } } } if !project.features.streaming_engine && !primitive_map.functions.is_empty() { - log::error!("Streaming disabled. Functions are disabled."); + tracing::error!("Streaming disabled. Functions are disabled."); show_message_wrapper( MessageType::Error, Message { @@ -720,7 +723,7 @@ impl InfrastructureMap { // consumption api endpoints let consumption_api_web_server = ConsumptionApiWebServer {}; if !project.features.apis && !primitive_map.consumption.endpoint_files.is_empty() { - log::error!("Analytics APIs disabled. API endpoints will not be available."); + tracing::error!("Analytics APIs disabled. API endpoints will not be available."); show_message_wrapper( MessageType::Error, Message { @@ -966,7 +969,7 @@ impl InfrastructureMap { ); // Tables (using custom strategy) - log::info!("Analyzing changes in Tables..."); + tracing::info!("Analyzing changes in Tables..."); let olap_changes_len_before = changes.olap_changes.len(); Self::diff_tables_with_strategy( &self.tables, @@ -978,13 +981,13 @@ impl InfrastructureMap { ignore_ops, ); let table_changes = changes.olap_changes.len() - olap_changes_len_before; - log::info!("Table changes detected: {}", table_changes); + tracing::info!("Table changes detected: {}", table_changes); // Views Self::diff_views(&self.views, &target_map.views, &mut changes.olap_changes); // SQL Resources (needs tables context for MV population detection) - log::info!("Analyzing changes in SQL Resources..."); + tracing::info!("Analyzing changes in SQL Resources..."); let olap_changes_len_before = changes.olap_changes.len(); Self::diff_sql_resources( &self.sql_resources, @@ -994,13 +997,13 @@ impl InfrastructureMap { &mut changes.olap_changes, ); let sql_resource_changes = changes.olap_changes.len() - olap_changes_len_before; - log::info!("SQL Resource changes detected: {}", sql_resource_changes); + tracing::info!("SQL Resource changes detected: {}", sql_resource_changes); // All process types self.diff_all_processes(target_map, &mut changes.processes_changes); // Summary - log::info!( + tracing::info!( "Total changes detected - OLAP: {}, Processes: {}, API: {}, WebApps: {}, Streaming: {}", changes.olap_changes.len(), changes.processes_changes.len(), @@ -1030,7 +1033,7 @@ impl InfrastructureMap { streaming_changes: &mut Vec, respect_life_cycle: bool, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Topics..."); + tracing::info!("Analyzing changes in Topics..."); let mut topic_updates = 0; let mut topic_removals = 0; let mut topic_additions = 0; @@ -1041,12 +1044,12 @@ impl InfrastructureMap { // Respect lifecycle: ExternallyManaged topics are never modified if target_topic.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { - log::debug!( + tracing::debug!( "Topic '{}' has changes but is externally managed - skipping update", topic.name ); } else { - log::debug!("Topic updated: {} ({})", topic.name, id); + tracing::debug!("Topic updated: {} ({})", topic.name, id); topic_updates += 1; streaming_changes.push(StreamingChange::Topic(Change::::Updated { before: Box::new(topic.clone()), @@ -1058,20 +1061,20 @@ impl InfrastructureMap { // Respect lifecycle: DeletionProtected and ExternallyManaged topics are never removed match (topic.life_cycle, respect_life_cycle) { (LifeCycle::FullyManaged, _) | (_, false) => { - log::debug!("Topic removed: {} ({})", topic.name, id); + tracing::debug!("Topic removed: {} ({})", topic.name, id); topic_removals += 1; streaming_changes.push(StreamingChange::Topic(Change::::Removed( Box::new(topic.clone()), ))); } (LifeCycle::DeletionProtected, true) => { - log::debug!( + tracing::debug!( "Topic '{}' marked for removal but is deletion-protected - skipping removal", topic.name ); } (LifeCycle::ExternallyManaged, true) => { - log::debug!( + tracing::debug!( "Topic '{}' marked for removal but is externally managed - skipping removal", topic.name ); @@ -1084,12 +1087,12 @@ impl InfrastructureMap { if !self_topics.contains_key(id) { // Respect lifecycle: ExternallyManaged topics are never added automatically if topic.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { - log::debug!( + tracing::debug!( "Topic '{}' marked for addition but is externally managed - skipping addition", topic.name ); } else { - log::debug!("Topic added: {} ({})", topic.name, id); + tracing::debug!("Topic added: {} ({})", topic.name, id); topic_additions += 1; streaming_changes.push(StreamingChange::Topic(Change::::Added( Box::new(topic.clone()), @@ -1098,7 +1101,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Topic changes: {} added, {} removed, {} updated", topic_additions, topic_removals, @@ -1125,7 +1128,7 @@ impl InfrastructureMap { target_endpoints: &HashMap, api_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in API Endpoints..."); + tracing::info!("Analyzing changes in API Endpoints..."); let mut endpoint_updates = 0; let mut endpoint_removals = 0; let mut endpoint_additions = 0; @@ -1133,7 +1136,7 @@ impl InfrastructureMap { for (id, endpoint) in self_endpoints { if let Some(target_endpoint) = target_endpoints.get(id) { if !api_endpoints_equal_ignore_metadata(endpoint, target_endpoint) { - log::debug!("API Endpoint updated: {}", id); + tracing::debug!("API Endpoint updated: {}", id); endpoint_updates += 1; api_changes.push(ApiChange::ApiEndpoint(Change::::Updated { before: Box::new(endpoint.clone()), @@ -1141,7 +1144,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("API Endpoint removed: {}", id); + tracing::debug!("API Endpoint removed: {}", id); endpoint_removals += 1; api_changes.push(ApiChange::ApiEndpoint(Change::::Removed( Box::new(endpoint.clone()), @@ -1151,7 +1154,7 @@ impl InfrastructureMap { for (id, endpoint) in target_endpoints { if !self_endpoints.contains_key(id) { - log::debug!("API Endpoint added: {}", id); + tracing::debug!("API Endpoint added: {}", id); endpoint_additions += 1; api_changes.push(ApiChange::ApiEndpoint(Change::::Added( Box::new(endpoint.clone()), @@ -1159,7 +1162,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "API Endpoint changes: {} added, {} removed, {} updated", endpoint_additions, endpoint_removals, @@ -1186,7 +1189,7 @@ impl InfrastructureMap { target_web_apps: &HashMap, web_app_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in WebApps..."); + tracing::info!("Analyzing changes in WebApps..."); let mut webapp_updates = 0; let mut webapp_removals = 0; let mut webapp_additions = 0; @@ -1194,7 +1197,7 @@ impl InfrastructureMap { for (id, webapp) in self_web_apps { if let Some(target_webapp) = target_web_apps.get(id) { if webapp != target_webapp { - log::debug!("WebApp updated: {}", id); + tracing::debug!("WebApp updated: {}", id); webapp_updates += 1; web_app_changes.push(WebAppChange::WebApp(Change::Updated { before: Box::new(webapp.clone()), @@ -1202,7 +1205,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("WebApp removed: {}", id); + tracing::debug!("WebApp removed: {}", id); webapp_removals += 1; web_app_changes.push(WebAppChange::WebApp(Change::Removed(Box::new( webapp.clone(), @@ -1212,7 +1215,7 @@ impl InfrastructureMap { for (id, webapp) in target_web_apps { if !self_web_apps.contains_key(id) { - log::debug!("WebApp added: {}", id); + tracing::debug!("WebApp added: {}", id); webapp_additions += 1; web_app_changes.push(WebAppChange::WebApp(Change::Added(Box::new( webapp.clone(), @@ -1220,7 +1223,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "WebApp changes: {} added, {} removed, {} updated", webapp_additions, webapp_removals, @@ -1247,7 +1250,7 @@ impl InfrastructureMap { target_views: &HashMap, olap_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Views..."); + tracing::info!("Analyzing changes in Views..."); let mut view_updates = 0; let mut view_removals = 0; let mut view_additions = 0; @@ -1256,7 +1259,7 @@ impl InfrastructureMap { for (id, view) in self_views { if let Some(target_view) = target_views.get(id) { if view != target_view { - log::debug!("View updated: {} ({})", view.name, id); + tracing::debug!("View updated: {} ({})", view.name, id); view_updates += 1; olap_changes.push(OlapChange::View(Change::Updated { before: Box::new(view.clone()), @@ -1264,7 +1267,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("View removed: {} ({})", view.name, id); + tracing::debug!("View removed: {} ({})", view.name, id); view_removals += 1; olap_changes.push(OlapChange::View(Change::Removed(Box::new(view.clone())))); } @@ -1273,13 +1276,13 @@ impl InfrastructureMap { // Check for additions for (id, view) in target_views { if !self_views.contains_key(id) { - log::debug!("View added: {} ({})", view.name, id); + tracing::debug!("View added: {} ({})", view.name, id); view_additions += 1; olap_changes.push(OlapChange::View(Change::Added(Box::new(view.clone())))); } } - log::info!( + tracing::info!( "View changes: {} added, {} removed, {} updated", view_additions, view_removals, @@ -1340,7 +1343,7 @@ impl InfrastructureMap { target_processes: &HashMap, process_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Topic-to-Table Sync Processes..."); + tracing::info!("Analyzing changes in Topic-to-Table Sync Processes..."); let mut process_updates = 0; let mut process_removals = 0; let mut process_additions = 0; @@ -1348,7 +1351,7 @@ impl InfrastructureMap { for (id, process) in self_processes { if let Some(target_process) = target_processes.get(id) { if process != target_process { - log::debug!("TopicToTableSyncProcess updated: {}", id); + tracing::debug!("TopicToTableSyncProcess updated: {}", id); process_updates += 1; process_changes.push(ProcessChange::TopicToTableSyncProcess(Change::< TopicToTableSyncProcess, @@ -1358,7 +1361,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("TopicToTableSyncProcess removed: {}", id); + tracing::debug!("TopicToTableSyncProcess removed: {}", id); process_removals += 1; process_changes.push(ProcessChange::TopicToTableSyncProcess(Change::< TopicToTableSyncProcess, @@ -1370,7 +1373,7 @@ impl InfrastructureMap { for (id, process) in target_processes { if !self_processes.contains_key(id) { - log::debug!("TopicToTableSyncProcess added: {}", id); + tracing::debug!("TopicToTableSyncProcess added: {}", id); process_additions += 1; process_changes.push(ProcessChange::TopicToTableSyncProcess(Change::< TopicToTableSyncProcess, @@ -1380,7 +1383,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Topic-to-Table Sync Process changes: {} added, {} removed, {} updated", process_additions, process_removals, @@ -1396,7 +1399,7 @@ impl InfrastructureMap { target_processes: &HashMap, process_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Topic-to-Topic Sync Processes..."); + tracing::info!("Analyzing changes in Topic-to-Topic Sync Processes..."); let mut process_updates = 0; let mut process_removals = 0; let mut process_additions = 0; @@ -1404,7 +1407,7 @@ impl InfrastructureMap { for (id, process) in self_processes { if let Some(target_process) = target_processes.get(id) { if process != target_process { - log::debug!("TopicToTopicSyncProcess updated: {}", id); + tracing::debug!("TopicToTopicSyncProcess updated: {}", id); process_updates += 1; process_changes.push(ProcessChange::TopicToTopicSyncProcess(Change::< TopicToTopicSyncProcess, @@ -1414,7 +1417,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("TopicToTopicSyncProcess removed: {}", id); + tracing::debug!("TopicToTopicSyncProcess removed: {}", id); process_removals += 1; process_changes.push(ProcessChange::TopicToTopicSyncProcess(Change::< TopicToTopicSyncProcess, @@ -1426,7 +1429,7 @@ impl InfrastructureMap { for (id, process) in target_processes { if !self_processes.contains_key(id) { - log::debug!("TopicToTopicSyncProcess added: {}", id); + tracing::debug!("TopicToTopicSyncProcess added: {}", id); process_additions += 1; process_changes.push(ProcessChange::TopicToTopicSyncProcess(Change::< TopicToTopicSyncProcess, @@ -1436,7 +1439,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Topic-to-Topic Sync Process changes: {} added, {} removed, {} updated", process_additions, process_removals, @@ -1452,7 +1455,7 @@ impl InfrastructureMap { target_processes: &HashMap, process_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Function Processes..."); + tracing::info!("Analyzing changes in Function Processes..."); let mut process_updates = 0; let mut process_removals = 0; let mut process_additions = 0; @@ -1461,7 +1464,7 @@ impl InfrastructureMap { if let Some(target_process) = target_processes.get(id) { // Always treat function processes as updated if they exist in both maps // This ensures function code changes are always redeployed - log::debug!("FunctionProcess updated (forced): {}", id); + tracing::debug!("FunctionProcess updated (forced): {}", id); process_updates += 1; process_changes.push(ProcessChange::FunctionProcess( Change::::Updated { @@ -1470,7 +1473,7 @@ impl InfrastructureMap { }, )); } else { - log::debug!("FunctionProcess removed: {}", id); + tracing::debug!("FunctionProcess removed: {}", id); process_removals += 1; process_changes.push(ProcessChange::FunctionProcess( Change::::Removed(Box::new(process.clone())), @@ -1480,7 +1483,7 @@ impl InfrastructureMap { for (id, process) in target_processes { if !self_processes.contains_key(id) { - log::debug!("FunctionProcess added: {}", id); + tracing::debug!("FunctionProcess added: {}", id); process_additions += 1; process_changes.push(ProcessChange::FunctionProcess( Change::::Added(Box::new(process.clone())), @@ -1488,7 +1491,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Function Process changes: {} added, {} removed, {} updated", process_additions, process_removals, @@ -1504,11 +1507,11 @@ impl InfrastructureMap { target_process: &OlapProcess, process_changes: &mut Vec, ) { - log::info!("Analyzing changes in OLAP processes..."); + tracing::info!("Analyzing changes in OLAP processes..."); // Currently we assume there is always a change and restart the processes // TODO: Once we refactor to have multiple processes, we should compare actual changes - log::debug!("OLAP Process updated (assumed for now)"); + tracing::debug!("OLAP Process updated (assumed for now)"); process_changes.push(ProcessChange::OlapProcess(Change::::Updated { before: Box::new(self_process.clone()), after: Box::new(target_process.clone()), @@ -1521,11 +1524,11 @@ impl InfrastructureMap { target_process: &ConsumptionApiWebServer, process_changes: &mut Vec, ) { - log::info!("Analyzing changes in Analytics API processes..."); + tracing::info!("Analyzing changes in Analytics API processes..."); // We are currently not tracking individual consumption endpoints, so we will just restart // the consumption web server when something changed - log::debug!("Analytics API Web Server updated (assumed for now)"); + tracing::debug!("Analytics API Web Server updated (assumed for now)"); process_changes.push(ProcessChange::ConsumptionApiWebServer(Change::< ConsumptionApiWebServer, >::Updated { @@ -1540,7 +1543,7 @@ impl InfrastructureMap { target_workers: &HashMap, process_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Orchestration Workers..."); + tracing::info!("Analyzing changes in Orchestration Workers..."); let mut worker_updates = 0; let mut worker_removals = 0; let mut worker_additions = 0; @@ -1548,7 +1551,7 @@ impl InfrastructureMap { for (id, worker) in self_workers { if let Some(target_worker) = target_workers.get(id) { // Always treat workers as updated to ensure redeployment - log::debug!( + tracing::debug!( "OrchestrationWorker updated (forced): {} ({})", id, worker.supported_language @@ -1561,7 +1564,7 @@ impl InfrastructureMap { after: Box::new(target_worker.clone()), })); } else { - log::debug!( + tracing::debug!( "OrchestrationWorker removed: {} ({})", id, worker.supported_language @@ -1577,7 +1580,7 @@ impl InfrastructureMap { for (id, worker) in target_workers { if !self_workers.contains_key(id) { - log::debug!( + tracing::debug!( "OrchestrationWorker added: {} ({})", id, worker.supported_language @@ -1591,7 +1594,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Orchestration Worker changes: {} added, {} removed, {} updated", worker_additions, worker_removals, @@ -1622,7 +1625,7 @@ impl InfrastructureMap { is_production: bool, olap_changes: &mut Vec, ) { - log::info!( + tracing::info!( "Analyzing SQL resource differences between {} source resources and {} target resources", self_sql_resources.len(), target_sql_resources.len() @@ -1636,7 +1639,7 @@ impl InfrastructureMap { if let Some(target_sql_resource) = target_sql_resources.get(id) { if sql_resource != target_sql_resource { // TODO: if only the teardown code changed, we should not need to execute any changes - log::debug!("SQL resource '{}' has differences", id); + tracing::debug!("SQL resource '{}' has differences", id); sql_resource_updates += 1; olap_changes.push(OlapChange::SqlResource(Change::Updated { before: Box::new(sql_resource.clone()), @@ -1654,7 +1657,7 @@ impl InfrastructureMap { ); } } else { - log::debug!("SQL resource '{}' removed", id); + tracing::debug!("SQL resource '{}' removed", id); sql_resource_removals += 1; olap_changes.push(OlapChange::SqlResource(Change::Removed(Box::new( sql_resource.clone(), @@ -1664,7 +1667,7 @@ impl InfrastructureMap { for (id, sql_resource) in target_sql_resources { if !self_sql_resources.contains_key(id) { - log::debug!("SQL resource '{}' added", id); + tracing::debug!("SQL resource '{}' added", id); sql_resource_additions += 1; olap_changes.push(OlapChange::SqlResource(Change::Added(Box::new( sql_resource.clone(), @@ -1682,7 +1685,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "SQL resource changes: {} added, {} removed, {} updated", sql_resource_additions, sql_resource_removals, @@ -1716,7 +1719,7 @@ impl InfrastructureMap { default_database: &str, ignore_ops: &[crate::infrastructure::olap::clickhouse::IgnorableOperation], ) { - log::info!( + tracing::info!( "Analyzing table differences between {} source tables and {} target tables", self_tables.len(), target_tables.len() @@ -1724,7 +1727,7 @@ impl InfrastructureMap { // Normalize tables for comparison if ignore_ops is provided let (normalized_self, normalized_target) = if !ignore_ops.is_empty() { - log::info!( + tracing::info!( "Normalizing tables before comparison. Ignore list: {:?}", ignore_ops ); @@ -1760,7 +1763,8 @@ impl InfrastructureMap { let mut table_additions = 0; // Use normalized tables for comparison, but original tables for changes - for normalized_table in normalized_self.values() { + // Iterate over key-value pairs to preserve the HashMap key for lookups + for (key, normalized_table) in normalized_self.iter() { // self_tables can be from remote where the keys are IDs with another database prefix // but they are then the default database, // the `database` field is None and we build the ID ourselves @@ -1768,17 +1772,18 @@ impl InfrastructureMap { normalized_target.get(&normalized_table.id(default_database)) { if !tables_equal_ignore_metadata(normalized_table, normalized_target) { - // Get original tables for use in changes + // Get original tables for use in changes using the HashMap key + // not the computed ID, since remote keys may differ from computed IDs let table = self_tables - .get(&normalized_table.id(default_database)) - .unwrap(); + .get(key) + .expect("normalized_self and self_tables should have same keys"); let target_table = target_tables .get(&normalized_target.id(default_database)) - .unwrap(); + .expect("normalized_target exists, so target_table should too"); // Respect lifecycle: ExternallyManaged tables are never modified if target_table.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { - log::debug!( + tracing::debug!( "Table '{}' has changes but is externally managed - skipping update", table.name ); @@ -1793,7 +1798,7 @@ impl InfrastructureMap { let original_len = column_changes.len(); column_changes.retain(|change| match change { ColumnChange::Removed(_) => { - log::debug!( + tracing::debug!( "Filtering out column removal for deletion-protected table '{}'", table.name ); @@ -1804,7 +1809,7 @@ impl InfrastructureMap { }); if original_len != column_changes.len() { - log::info!( + tracing::info!( "Filtered {} destructive column changes for deletion-protected table '{}'", original_len - column_changes.len(), table.name @@ -1821,39 +1826,16 @@ impl InfrastructureMap { before: normalized_table.partition_by.clone(), after: normalized_target.partition_by.clone(), }; - - // Compute ORDER BY changes - fn order_by_from_primary_key(target_table: &Table) -> Vec { - target_table - .columns - .iter() - .filter_map(|c| { - if c.primary_key { - Some(c.name.clone()) - } else { - None - } - }) - .collect() - } - - let order_by_changed = table.order_by != target_table.order_by - // target may leave order_by unspecified, - // but the implicit order_by from primary keys can be the same - // ONLY for engines that support ORDER BY (MergeTree family and S3) - // Buffer, S3Queue, and Distributed don't support ORDER BY - // When engine is None, ClickHouse defaults to MergeTree - && !(target_table.order_by.is_empty() - && target_table.engine.as_ref().is_none_or(|e| e.supports_order_by()) - && matches!( - &table.order_by, - OrderBy::Fields(v) - if *v == order_by_from_primary_key(target_table) - )); + let order_by_changed = !table.order_by_equals(target_table); // Detect engine change (e.g., MergeTree -> ReplacingMergeTree) let engine_changed = table.engine != target_table.engine; + // Note: We intentionally do NOT check for cluster_name changes here. + // cluster_name is a deployment directive (how to run DDL), not a schema property. + // The inframap will be updated with the new cluster_name value, and future DDL + // operations will use it, but changing cluster_name doesn't trigger operations. + let order_by_change = if order_by_changed { OrderByChange { before: table.order_by.clone(), @@ -1875,7 +1857,7 @@ impl InfrastructureMap { &table.table_ttl_setting, &target_table.table_ttl_setting, ) { - log::debug!( + tracing::debug!( "Table '{}' has table-level TTL change: {:?} -> {:?}", table.name, table.table_ttl_setting, @@ -1894,6 +1876,8 @@ impl InfrastructureMap { // since ClickHouse requires the full column definition when modifying TTL // Only process changes if there are actual differences to report + // Note: cluster_name changes are intentionally excluded - they don't trigger operations + // TODO: table_settings is not checked in the if condition, but checked by ClickHouseTableDiffStrategy if !column_changes.is_empty() || order_by_changed || partition_by_changed @@ -1919,25 +1903,25 @@ impl InfrastructureMap { } } } else { - // Get original table for removal + // Get original table for removal using the HashMap key let table = self_tables - .get(&normalized_table.id(default_database)) - .unwrap(); + .get(key) + .expect("normalized_self and self_tables should have same keys"); // Respect lifecycle: DeletionProtected and ExternallyManaged tables are never removed match (table.life_cycle, respect_life_cycle) { (LifeCycle::FullyManaged, _) | (_, false) => { - log::debug!("Table '{}' removed", table.name); + tracing::debug!("Table '{}' removed", table.name); table_removals += 1; olap_changes.push(OlapChange::Table(TableChange::Removed(table.clone()))); } (LifeCycle::DeletionProtected, true) => { - log::debug!( + tracing::debug!( "Table '{}' marked for removal but is deletion-protected - skipping removal", table.name ); } (LifeCycle::ExternallyManaged, true) => { - log::debug!( + tracing::debug!( "Table '{}' marked for removal but is externally managed - skipping removal", table.name ); @@ -1951,18 +1935,18 @@ impl InfrastructureMap { if find_table_from_infra_map(table, &normalized_self, default_database).is_none() { // Respect lifecycle: ExternallyManaged tables are never added automatically if table.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { - log::debug!( + tracing::debug!( "Table '{}' marked for addition but is externally managed - skipping addition", table.name ); } else { - log::debug!( + tracing::debug!( "Table '{}' added with {} columns", table.name, table.columns.len() ); for col in &table.columns { - log::trace!(" - Column: {} ({})", col.name, col.data_type); + tracing::trace!(" - Column: {} ({})", col.name, col.data_type); } table_additions += 1; olap_changes.push(OlapChange::Table(TableChange::Added(table.clone()))); @@ -1970,7 +1954,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Table changes: {} added, {} removed, {} updated", table_additions, table_removals, @@ -2100,7 +2084,7 @@ impl InfrastructureMap { let original_len = column_changes.len(); column_changes.retain(|change| match change { ColumnChange::Removed(_) => { - log::debug!( + tracing::debug!( "Filtering out column removal for deletion-protected table '{}'", table.name ); @@ -2111,7 +2095,7 @@ impl InfrastructureMap { }); if original_len != column_changes.len() { - log::info!( + tracing::info!( "Filtered {} destructive column changes for deletion-protected table '{}'", original_len - column_changes.len(), table.name @@ -2119,33 +2103,7 @@ impl InfrastructureMap { } } - fn order_by_from_primary_key(target_table: &Table) -> Vec { - target_table - .columns - .iter() - .filter_map(|c| { - if c.primary_key { - Some(c.name.clone()) - } else { - None - } - }) - .collect() - } - - let order_by_changed = table.order_by != target_table.order_by - // target may leave order_by unspecified, - // but the implicit order_by from primary keys can be the same - // ONLY for engines that support ORDER BY (MergeTree family and S3) - // Buffer, S3Queue, and Distributed don't support ORDER BY - // When engine is None, ClickHouse defaults to MergeTree - && !(target_table.order_by.is_empty() - && target_table.engine.as_ref().is_none_or(|e| e.supports_order_by()) - && matches!( - &table.order_by, - crate::framework::core::infrastructure::table::OrderBy::Fields(v) - if *v == order_by_from_primary_key(target_table) - )); + let order_by_changed = !table.order_by_equals(target_table); let order_by_change = if order_by_changed { OrderByChange { @@ -2237,81 +2195,75 @@ impl InfrastructureMap { for table in self.tables.values_mut() { let mut should_recalc_hash = false; - if let Some(engine) = &mut table.engine { - match engine { - ClickhouseEngine::S3Queue { - aws_access_key_id, - aws_secret_access_key, - .. - } => { - // Resolve environment variable markers for AWS credentials - let resolved_access_key = resolve_optional_runtime_env(aws_access_key_id) - .map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", - table.name, e - ) - })?; - - let resolved_secret_key = - resolve_optional_runtime_env(aws_secret_access_key).map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", - table.name, e - ) - })?; - - *aws_access_key_id = resolved_access_key; - *aws_secret_access_key = resolved_secret_key; - should_recalc_hash = true; - - log::debug!( - "Resolved S3Queue credentials for table '{}' at runtime", - table.name - ); - } - ClickhouseEngine::S3 { - aws_access_key_id, - aws_secret_access_key, - .. - } => { - // Resolve environment variable markers for AWS credentials - let resolved_access_key = resolve_optional_runtime_env(aws_access_key_id) - .map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", - table.name, e - ) - })?; - - let resolved_secret_key = - resolve_optional_runtime_env(aws_secret_access_key).map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", - table.name, e - ) - })?; - - *aws_access_key_id = resolved_access_key; - *aws_secret_access_key = resolved_secret_key; - should_recalc_hash = true; - - log::debug!( - "Resolved S3 credentials for table '{}' at runtime", - table.name - ); - } - _ => { - // No credentials to resolve for other engine types - } + // Helper closure to resolve AWS credentials for S3-based engines + let resolve_aws_credentials = |access_key: &mut Option, + secret_key: &mut Option, + engine_name: &str| + -> Result<(), String> { + let resolved_access_key = resolve_optional_runtime_env(access_key).map_err( + |e| { + format!( + "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", + table.name, e + ) + }, + )?; + + let resolved_secret_key = resolve_optional_runtime_env(secret_key).map_err( + |e| { + format!( + "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", + table.name, e + ) + }, + )?; + + *access_key = resolved_access_key; + *secret_key = resolved_secret_key; + + tracing::debug!( + "Resolved {} credentials for table '{}' at runtime", + engine_name, + table.name + ); + + Ok(()) + }; + + match &mut table.engine { + ClickhouseEngine::S3Queue { + aws_access_key_id, + aws_secret_access_key, + .. + } => { + resolve_aws_credentials(aws_access_key_id, aws_secret_access_key, "S3Queue")?; + should_recalc_hash = true; + } + ClickhouseEngine::S3 { + aws_access_key_id, + aws_secret_access_key, + .. + } => { + resolve_aws_credentials(aws_access_key_id, aws_secret_access_key, "S3")?; + should_recalc_hash = true; + } + ClickhouseEngine::IcebergS3 { + aws_access_key_id, + aws_secret_access_key, + .. + } => { + resolve_aws_credentials(aws_access_key_id, aws_secret_access_key, "IcebergS3")?; + should_recalc_hash = true; + } + _ => { + // No credentials to resolve for other engine types } } // Recalculate engine_params_hash after resolving credentials if should_recalc_hash { - table.engine_params_hash = - table.engine.as_ref().map(|e| e.non_alterable_params_hash()); - log::debug!( + table.engine_params_hash = Some(table.engine.non_alterable_params_hash()); + tracing::debug!( "Recalculated engine_params_hash for table '{}' after credential resolution", table.name ); @@ -2372,7 +2324,7 @@ impl InfrastructureMap { pub async fn load_from_last_redis_prefix(redis_client: &RedisClient) -> Result> { let last_prefix = &redis_client.config.last_key_prefix; - log::info!( + tracing::info!( "Loading InfrastructureMap from last Redis prefix: {}", last_prefix ); @@ -2383,7 +2335,7 @@ impl InfrastructureMap { .context("Failed to get InfrastructureMap from Redis using LAST_KEY_PREFIX"); if let Err(e) = encoded { - log::error!("{}", e); + tracing::error!("{}", e); return Ok(None); } @@ -2559,6 +2511,46 @@ impl InfrastructureMap { self.tables.values().find(|table| table.name == name) } + /// Normalizes the infrastructure map for backward compatibility + /// + /// This applies the same normalization logic as partial_infrastructure_map.rs + /// to ensure consistent comparison between old and new infrastructure maps. + /// + /// Specifically: + /// - Falls back to primary key columns for order_by when it's empty (for MergeTree tables) + /// - Ensures arrays are always required=true (ClickHouse doesn't support Nullable(Array)) + /// + /// This is needed because older CLI versions didn't persist order_by when it was + /// derived from primary key columns. + pub fn normalize(mut self) -> Self { + use crate::framework::core::infrastructure::table::ColumnType; + + self.tables = self + .tables + .into_iter() + .map(|(id, mut table)| { + // Fall back to primary key columns if order_by is empty for MergeTree engines + // This ensures backward compatibility when order_by isn't explicitly set + // We only do this for MergeTree family to avoid breaking S3 tables + if table.order_by.is_empty() { + table.order_by = table.order_by_with_fallback(); + } + + // Normalize columns: ClickHouse doesn't support Nullable(Array(...)) + // Arrays must always be NOT NULL (required=true) + for col in &mut table.columns { + if matches!(col.data_type, ColumnType::Array { .. }) { + col.required = true; + } + } + + (id, table) + }) + .collect(); + + self + } + /// Adds a topic to the infrastructure map /// /// # Arguments @@ -2881,7 +2873,7 @@ pub fn compute_table_columns_diff(before: &Table, after: &Table) -> Vec Vec Vec(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + // We need to temporarily derive Serialize on a shadow type to avoid infinite recursion + // Create a JSON value using the derived Serialize, then sort keys + #[derive(serde::Serialize)] + struct InfrastructureMapForSerialization<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + default_database: Option<&'a String>, + topics: &'a HashMap, + api_endpoints: &'a HashMap, + tables: &'a HashMap, + views: &'a HashMap, + topic_to_table_sync_processes: &'a HashMap, + topic_to_topic_sync_processes: &'a HashMap, + function_processes: &'a HashMap, + block_db_processes: &'a OlapProcess, + consumption_api_web_server: &'a ConsumptionApiWebServer, + orchestration_workers: &'a HashMap, + sql_resources: &'a HashMap, + workflows: &'a HashMap, + web_apps: &'a HashMap, + } + + let shadow_map = InfrastructureMapForSerialization { + default_database: Some(&self.default_database), + topics: &self.topics, + api_endpoints: &self.api_endpoints, + tables: &self.tables, + views: &self.views, + topic_to_table_sync_processes: &self.topic_to_table_sync_processes, + topic_to_topic_sync_processes: &self.topic_to_topic_sync_processes, + function_processes: &self.function_processes, + block_db_processes: &self.block_db_processes, + consumption_api_web_server: &self.consumption_api_web_server, + orchestration_workers: &self.orchestration_workers, + sql_resources: &self.sql_resources, + workflows: &self.workflows, + web_apps: &self.web_apps, + }; + + // Serialize to JSON value, sort keys, then serialize that + let json_value = serde_json::to_value(&shadow_map).map_err(serde::ser::Error::custom)?; + let sorted_value = crate::utilities::json::sort_json_keys(json_value); + sorted_value.serialize(serializer) + } +} + #[cfg(test)] mod tests { use crate::framework::core::infrastructure::table::IntType; @@ -2961,12 +3007,13 @@ mod tests { }; use crate::framework::versions::Version; use crate::infrastructure::olap::clickhouse::config::DEFAULT_DATABASE_NAME; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; #[test] fn test_compute_table_diff() { let before = Table { name: "test_table".to_string(), - engine: None, + engine: ClickhouseEngine::MergeTree, columns: vec![ Column { name: "id".to_string(), @@ -3017,11 +3064,13 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let after = Table { name: "test_table".to_string(), - engine: None, + engine: ClickhouseEngine::MergeTree, columns: vec![ Column { name: "id".to_string(), @@ -3072,6 +3121,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let diff = compute_table_columns_diff(&before, &after); @@ -3230,7 +3281,7 @@ mod diff_tests { pub fn create_test_table(name: &str, version: &str) -> Table { Table { name: name.to_string(), - engine: None, + engine: ClickhouseEngine::MergeTree, columns: vec![], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -3247,6 +3298,8 @@ mod diff_tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, } } @@ -3528,11 +3581,11 @@ mod diff_tests { let mut before = create_test_table("test", "1.0"); let mut after = create_test_table("test", "1.0"); - before.engine = Some(ClickhouseEngine::MergeTree); - after.engine = Some(ClickhouseEngine::ReplacingMergeTree { + before.engine = ClickhouseEngine::MergeTree; + after.engine = ClickhouseEngine::ReplacingMergeTree { ver: None, is_deleted: None, - }); + }; // Set database field for both tables before.database = Some(DEFAULT_DATABASE_NAME.to_string()); @@ -3557,10 +3610,10 @@ mod diff_tests { after: a, .. }) => { - assert_eq!(b.engine.as_ref(), Some(&ClickhouseEngine::MergeTree)); + assert!(matches!(&b.engine, ClickhouseEngine::MergeTree)); assert!(matches!( - a.engine.as_ref(), - Some(ClickhouseEngine::ReplacingMergeTree { .. }) + &a.engine, + ClickhouseEngine::ReplacingMergeTree { .. } )); } _ => panic!("Expected Updated change with engine modification"), @@ -4497,6 +4550,7 @@ mod diff_sql_resources_tests { fn create_sql_resource(name: &str, setup: Vec<&str>, teardown: Vec<&str>) -> SqlResource { SqlResource { name: name.to_string(), + database: None, setup: setup.iter().map(|s| s.to_string()).collect(), teardown: teardown.iter().map(|s| s.to_string()).collect(), pulls_data_from: vec![], @@ -4732,6 +4786,7 @@ mod diff_sql_resources_tests { let mv_before = SqlResource { name: "events_summary_mv".to_string(), + database: None, setup: vec!["CREATE MATERIALIZED VIEW events_summary_mv TO events_summary_table AS SELECT id, name FROM events".to_string()], teardown: vec!["DROP VIEW events_summary_mv".to_string()], pulls_data_from: vec![InfrastructureSignature::Table { @@ -4744,6 +4799,7 @@ mod diff_sql_resources_tests { let mv_after = SqlResource { name: "events_summary_mv".to_string(), + database: None, setup: vec!["CREATE MATERIALIZED VIEW events_summary_mv TO events_summary_table AS SELECT id, name, timestamp FROM events".to_string()], teardown: vec!["DROP VIEW events_summary_mv".to_string()], pulls_data_from: vec![InfrastructureSignature::Table { diff --git a/apps/framework-cli/src/framework/core/migration_plan.rs b/apps/framework-cli/src/framework/core/migration_plan.rs index 2da1aaab94..2df5ecac10 100644 --- a/apps/framework-cli/src/framework/core/migration_plan.rs +++ b/apps/framework-cli/src/framework/core/migration_plan.rs @@ -1,11 +1,15 @@ use crate::framework::core::infrastructure_map::{InfraChanges, InfrastructureMap}; use crate::infrastructure::olap::clickhouse::SerializableOlapOperation; use crate::infrastructure::olap::ddl_ordering::{order_olap_changes, PlanOrderingError}; +use crate::utilities::json; use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; +use serde::Deserialize; /// A comprehensive migration plan that can be reviewed, approved, and executed -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// Note: This type has a custom `Serialize` implementation that sorts all JSON keys +/// alphabetically for deterministic output in version-controlled migration files. +#[derive(Debug, Clone, Deserialize)] pub struct MigrationPlan { /// Timestamp when this plan was generated pub created_at: DateTime, @@ -51,13 +55,42 @@ impl MigrationPlan { } pub fn to_yaml(&self) -> anyhow::Result { - let plan_json = serde_json::to_value(self)?; // going through JSON before YAML because tooling does not support `!tag` + // Sorted keys are handled by the custom Serialize implementation + let plan_json = serde_json::to_value(self)?; let plan_yaml = serde_yaml::to_string(&plan_json)?; Ok(plan_yaml) } } +impl serde::Serialize for MigrationPlan { + /// Custom serialization with sorted keys for deterministic output. + /// + /// Migration files are version-controlled, so we need consistent output. + /// Without sorted keys, HashMap serialization order is random, causing noisy diffs. + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + // Shadow type to avoid infinite recursion + #[derive(serde::Serialize)] + struct MigrationPlanForSerialization<'a> { + created_at: &'a DateTime, + operations: &'a Vec, + } + + let shadow = MigrationPlanForSerialization { + created_at: &self.created_at, + operations: &self.operations, + }; + + // Serialize to JSON value, sort keys, then serialize that + let json_value = serde_json::to_value(&shadow).map_err(serde::ser::Error::custom)?; + let sorted_value = json::sort_json_keys(json_value); + sorted_value.serialize(serializer) + } +} + pub struct MigrationPlanWithBeforeAfter { pub remote_state: InfrastructureMap, pub local_infra_map: InfrastructureMap, diff --git a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs index 2ec987f84c..1bc781c1c0 100644 --- a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs @@ -39,9 +39,9 @@ use std::{ path::{Path, PathBuf}, }; -use log::debug; use serde::{Deserialize, Serialize}; use tokio::process::Child; +use tracing::debug; use super::{ infrastructure::{ @@ -51,7 +51,7 @@ use super::{ olap_process::OlapProcess, orchestration_worker::OrchestrationWorker, sql_resource::SqlResource, - table::{Column, Metadata, Table, TableIndex}, + table::{Column, ColumnType, Metadata, Table, TableIndex}, topic::{KafkaSchema, Topic, DEFAULT_MAX_MESSAGE_BYTES}, topic_sync_process::{TopicToTableSyncProcess, TopicToTopicSyncProcess}, view::View, @@ -59,6 +59,7 @@ use super::{ infrastructure_map::{InfrastructureMap, PrimitiveSignature, PrimitiveTypes}, }; use crate::framework::core::infrastructure::table::OrderBy; +use crate::infrastructure::olap::clickhouse::queries::BufferEngine; use crate::{ framework::{ consumption::model::ConsumptionQueryParam, languages::SupportedLanguages, @@ -155,6 +156,16 @@ struct DistributedConfig { policy_name: Option, } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct IcebergS3Config { + path: String, + format: String, + aws_access_key_id: Option, + aws_secret_access_key: Option, + compression: Option, +} + #[derive(Debug, Deserialize)] #[serde(tag = "engine", rename_all = "camelCase")] enum EngineConfig { @@ -227,6 +238,9 @@ enum EngineConfig { #[serde(rename = "Distributed")] Distributed(Box), + + #[serde(rename = "IcebergS3")] + IcebergS3(Box), } #[derive(Debug, Deserialize)] @@ -256,6 +270,12 @@ struct PartialTable { /// Optional database name for multi-database support #[serde(default)] pub database: Option, + /// Optional cluster name for ON CLUSTER support + #[serde(default)] + pub cluster: Option, + /// Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified) + #[serde(default, alias = "primary_key_expression")] + pub primary_key_expression: Option, } /// Represents a topic definition from user code before it's converted into a complete [`Topic`]. @@ -523,7 +543,7 @@ impl PartialInfrastructureMap { .split("end___MOOSE_STUFF___") .next() .ok_or_else(output_format)?; - log::info!("load_from_user_code inframap json: {}", json); + tracing::info!("load_from_user_code inframap json: {}", json); Ok(serde_json::from_str(json) .inspect_err(|_| debug!("Invalid JSON from exports: {}", raw_string_stdout))?) @@ -614,7 +634,7 @@ impl PartialInfrastructureMap { .map(|v_str| Version::from_string(v_str.clone())); let engine = self.parse_engine(partial_table)?; - let engine_params_hash = engine.as_ref().map(|e| e.non_alterable_params_hash()); + let engine_params_hash = Some(engine.non_alterable_params_hash()); // S3Queue settings should come directly from table_settings in the user code let mut table_settings = partial_table.table_settings.clone().unwrap_or_default(); @@ -622,11 +642,7 @@ impl PartialInfrastructureMap { // Apply ClickHouse default settings for MergeTree family engines // This ensures our internal representation matches what ClickHouse actually has // and prevents unnecessary diffs - // Note: When engine is None, ClickHouse defaults to MergeTree, so we apply defaults in that case too - let should_apply_mergetree_defaults = match &engine { - None => true, // No engine specified defaults to MergeTree - Some(eng) => eng.is_merge_tree_family(), - }; + let should_apply_mergetree_defaults = engine.is_merge_tree_family(); if should_apply_mergetree_defaults { // Apply MergeTree defaults if not explicitly set by user @@ -653,25 +669,55 @@ impl PartialInfrastructureMap { } // Buffer, S3Queue, Distributed, and other non-MergeTree engines don't support PRIMARY KEY - // When engine is None, ClickHouse defaults to MergeTree which does support it - let supports_primary_key = engine.as_ref().is_none_or(|e| e.supports_order_by()); - // Clear primary_key flag from columns if engine doesn't support it - let columns = if supports_primary_key { - partial_table.columns.clone() - } else { - partial_table - .columns - .iter() - .map(|col| Column { - primary_key: false, - ..col.clone() - }) - .collect() - }; + let supports_primary_key = engine.supports_order_by(); + + // Normalize columns: + // 1. Clear primary_key flag if engine doesn't support it + // 2. Force arrays to be required=true (ClickHouse doesn't support nullable arrays) + let columns: Vec = partial_table + .columns + .iter() + .map(|col| { + let mut normalized_col = col.clone(); + + // Clear primary_key if engine doesn't support it + if !supports_primary_key { + normalized_col.primary_key = false; + } + + // ClickHouse doesn't support Nullable(Array(...)) + // Arrays must always be NOT NULL (required=true) + if matches!(col.data_type, ColumnType::Array { .. }) { + normalized_col.required = true; + } + + normalized_col + }) + .collect(); // Extract table-level TTL from partial table let table_ttl_setting = partial_table.ttl.clone(); + // Fall back to primary key columns if order_by is empty for MergeTree engines + // This ensures backward compatibility when order_by isn't explicitly set + // We only do this for MergeTree family to avoid breaking S3 tables + let order_by = if partial_table.order_by.is_empty() && engine.is_merge_tree_family() + { + let primary_key_columns: Vec = columns + .iter() + .filter_map(|c| { + if c.primary_key { + Some(c.name.clone()) + } else { + None + } + }) + .collect(); + OrderBy::Fields(primary_key_columns) + } else { + partial_table.order_by.clone() + }; + let table = Table { name: version .as_ref() @@ -679,7 +725,7 @@ impl PartialInfrastructureMap { format!("{}_{}", partial_table.name, version.as_suffix()) }), columns, - order_by: partial_table.order_by.clone(), + order_by, partition_by: partial_table.partition_by.clone(), sample_by: partial_table.sample_by.clone(), engine, @@ -699,6 +745,8 @@ impl PartialInfrastructureMap { indexes: partial_table.indexes.clone(), table_ttl_setting, database: partial_table.database.clone(), + cluster_name: partial_table.cluster.clone(), + primary_key_expression: partial_table.primary_key_expression.clone(), }; Ok((table.id(default_database), table)) }) @@ -714,81 +762,81 @@ impl PartialInfrastructureMap { fn parse_engine( &self, partial_table: &PartialTable, - ) -> Result, DmV2LoadingError> { + ) -> Result { match &partial_table.engine_config { - Some(EngineConfig::MergeTree {}) => Ok(Some(ClickhouseEngine::MergeTree)), + Some(EngineConfig::MergeTree {}) => Ok(ClickhouseEngine::MergeTree), Some(EngineConfig::ReplacingMergeTree { ver, is_deleted }) => { - Ok(Some(ClickhouseEngine::ReplacingMergeTree { + Ok(ClickhouseEngine::ReplacingMergeTree { ver: ver.clone(), is_deleted: is_deleted.clone(), - })) + }) } Some(EngineConfig::AggregatingMergeTree {}) => { - Ok(Some(ClickhouseEngine::AggregatingMergeTree)) + Ok(ClickhouseEngine::AggregatingMergeTree) } Some(EngineConfig::SummingMergeTree { columns }) => { - Ok(Some(ClickhouseEngine::SummingMergeTree { + Ok(ClickhouseEngine::SummingMergeTree { columns: columns.clone(), - })) + }) } Some(EngineConfig::ReplicatedMergeTree { keeper_path, replica_name, - }) => Ok(Some(ClickhouseEngine::ReplicatedMergeTree { + }) => Ok(ClickhouseEngine::ReplicatedMergeTree { keeper_path: keeper_path.clone(), replica_name: replica_name.clone(), - })), + }), Some(EngineConfig::ReplicatedReplacingMergeTree { keeper_path, replica_name, ver, is_deleted, - }) => Ok(Some(ClickhouseEngine::ReplicatedReplacingMergeTree { + }) => Ok(ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path: keeper_path.clone(), replica_name: replica_name.clone(), ver: ver.clone(), is_deleted: is_deleted.clone(), - })), + }), Some(EngineConfig::ReplicatedAggregatingMergeTree { keeper_path, replica_name, - }) => Ok(Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { + }) => Ok(ClickhouseEngine::ReplicatedAggregatingMergeTree { keeper_path: keeper_path.clone(), replica_name: replica_name.clone(), - })), + }), Some(EngineConfig::ReplicatedSummingMergeTree { keeper_path, replica_name, columns, - }) => Ok(Some(ClickhouseEngine::ReplicatedSummingMergeTree { + }) => Ok(ClickhouseEngine::ReplicatedSummingMergeTree { keeper_path: keeper_path.clone(), replica_name: replica_name.clone(), columns: columns.clone(), - })), + }), Some(EngineConfig::S3Queue(config)) => { // Keep environment variable markers as-is - credentials will be resolved at runtime // S3Queue settings are handled in table_settings, not in the engine - Ok(Some(ClickhouseEngine::S3Queue { + Ok(ClickhouseEngine::S3Queue { s3_path: config.s3_path.clone(), format: config.format.clone(), compression: config.compression.clone(), headers: config.headers.clone(), aws_access_key_id: config.aws_access_key_id.clone(), aws_secret_access_key: config.aws_secret_access_key.clone(), - })) + }) } Some(EngineConfig::S3(config)) => { // Keep environment variable markers as-is - credentials will be resolved at runtime - Ok(Some(ClickhouseEngine::S3 { + Ok(ClickhouseEngine::S3 { path: config.path.clone(), format: config.format.clone(), aws_access_key_id: config.aws_access_key_id.clone(), @@ -796,10 +844,10 @@ impl PartialInfrastructureMap { compression: config.compression.clone(), partition_strategy: config.partition_strategy.clone(), partition_columns_in_data_file: config.partition_columns_in_data_file.clone(), - })) + }) } - Some(EngineConfig::Buffer(config)) => Ok(Some(ClickhouseEngine::Buffer { + Some(EngineConfig::Buffer(config)) => Ok(ClickhouseEngine::Buffer(BufferEngine { target_database: config.target_database.clone(), target_table: config.target_table.clone(), num_layers: config.num_layers, @@ -814,15 +862,26 @@ impl PartialInfrastructureMap { flush_bytes: config.flush_bytes, })), - Some(EngineConfig::Distributed(config)) => Ok(Some(ClickhouseEngine::Distributed { + Some(EngineConfig::Distributed(config)) => Ok(ClickhouseEngine::Distributed { cluster: config.cluster.clone(), target_database: config.target_database.clone(), target_table: config.target_table.clone(), sharding_key: config.sharding_key.clone(), policy_name: config.policy_name.clone(), - })), + }), + + Some(EngineConfig::IcebergS3(config)) => { + // Keep environment variable markers as-is - credentials will be resolved at runtime + Ok(ClickhouseEngine::IcebergS3 { + path: config.path.clone(), + format: config.format.clone(), + aws_access_key_id: config.aws_access_key_id.clone(), + aws_secret_access_key: config.aws_secret_access_key.clone(), + compression: config.compression.clone(), + }) + } - None => Ok(None), + None => Ok(ClickhouseEngine::MergeTree), } } @@ -1076,9 +1135,9 @@ impl PartialInfrastructureMap { TopicToTableSyncProcess::new(source_topic, target_table, default_database); let sync_id = sync_process.id(); sync_processes.insert(sync_id.clone(), sync_process); - log::info!(" Created topic_to_table_sync_processes {}", sync_id); + tracing::info!(" Created topic_to_table_sync_processes {}", sync_id); } else { - log::info!( + tracing::info!( " Topic {} has no target_table specified, skipping sync process creation", partial_topic.name ); diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index ee87431c67..a3f38e3c5b 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -14,7 +14,7 @@ /// The resulting plan is then used by the execution module to apply the changes. use crate::framework::core::infra_reality_checker::{InfraRealityChecker, RealityCheckError}; use crate::framework::core::infrastructure_map::{ - InfraChanges, InfrastructureMap, OlapChange, TableChange, + Change, InfraChanges, InfrastructureMap, OlapChange, TableChange, }; use crate::framework::core::primitive_map::PrimitiveMap; use crate::framework::core::state_storage::StateStorage; @@ -24,12 +24,12 @@ use crate::infrastructure::olap::clickhouse::config::DEFAULT_DATABASE_NAME; use crate::infrastructure::olap::clickhouse::diff_strategy::ClickHouseTableDiffStrategy; use crate::infrastructure::olap::OlapOperations; use crate::project::Project; -use log::{debug, error, info}; use rdkafka::error::KafkaError; use serde::{Deserialize, Serialize}; use std::collections::HashSet; use std::mem; use std::path::Path; +use tracing::{debug, error, info}; /// Errors that can occur during the planning process. #[derive(Debug, thiserror::Error)] @@ -38,10 +38,6 @@ pub enum PlanningError { #[error("Failed to load primitive map")] PrimitiveMapLoading(#[from] crate::framework::core::primitive_map::PrimitiveMapLoadingError), - /// Error occurred while connecting to the Clickhouse database - #[error("Failed to connect to state storage")] - Clickhouse(#[from] clickhouse_rs::errors::Error), - /// Error occurred while connecting to Kafka #[error("Failed to connect to streaming engine")] Kafka(#[from] KafkaError), @@ -76,7 +72,8 @@ pub enum PlanningError { /// # Arguments /// * `project` - The project configuration /// * `infra_map` - The infrastructure map to update -/// * `target_table_names` - Names of tables to include from unmapped tables (tables in DB but not in current inframap). Only unmapped tables with names in this set will be added to the reconciled inframap. +/// * `target_table_ids` - Tables to include from unmapped tables (tables in DB but not in current inframap). Only unmapped tables with names in this set will be added to the reconciled inframap. +/// * `target_sql_resource_ids` - SQL resources to include from unmapped SQL resources. /// * `olap_client` - The OLAP client to use for checking reality /// /// # Returns @@ -85,6 +82,7 @@ pub async fn reconcile_with_reality( project: &Project, current_infra_map: &InfrastructureMap, target_table_ids: &HashSet, + target_sql_resource_ids: &HashSet, olap_client: T, ) -> Result { info!("Reconciling infrastructure map with actual database state"); @@ -172,6 +170,12 @@ pub async fn reconcile_with_reality( // that might have authentication parameters. table.engine_params_hash = infra_map_table.engine_params_hash.clone(); + // Keep the cluster_name from the infra map because it cannot be reliably detected + // from ClickHouse's system tables. The ON CLUSTER clause is only used during + // DDL execution and is not stored in the table schema. While it appears in + // system.distributed_ddl_queue, those entries are ephemeral and get cleaned up. + table.cluster_name = infra_map_table.cluster_name.clone(); + reconciled_map .tables .insert(reality_table.id(&reconciled_map.default_database), table); @@ -252,6 +256,60 @@ pub async fn reconcile_with_reality( } } + // Handle SQL resources reconciliation + debug!("Reconciling SQL resources (views and materialized views)"); + + // Remove missing SQL resources (in map but don't exist in reality) + for missing_sql_resource_id in discrepancies.missing_sql_resources { + debug!( + "Removing missing SQL resource from infrastructure map: {}", + missing_sql_resource_id + ); + reconciled_map + .sql_resources + .remove(&missing_sql_resource_id); + } + + // Add unmapped SQL resources (exist in database but not in current infrastructure map) + // Only include resources whose names are in target_sql_resource_ids to avoid managing external resources + for unmapped_sql_resource in discrepancies.unmapped_sql_resources { + let name = &unmapped_sql_resource.name; + + if target_sql_resource_ids.contains(name) { + debug!( + "Adding unmapped SQL resource found in reality to infrastructure map: {}", + name + ); + reconciled_map + .sql_resources + .insert(name.clone(), unmapped_sql_resource); + } + } + + // Update mismatched SQL resources (exist in both but differ) + for change in discrepancies.mismatched_sql_resources { + match change { + OlapChange::SqlResource(Change::Updated { before, .. }) => { + // We use 'before' (the actual resource from reality) because we want the + // reconciled map to reflect the current state of the database. + // This ensures the subsequent diff against the target map will correctly + // identify that the current state differs from the desired state. + let name = &before.name; + debug!( + "Updating mismatched SQL resource in infrastructure map to match reality: {}", + name + ); + reconciled_map.sql_resources.insert(name.clone(), *before); + } + _ => { + tracing::warn!( + "Unexpected change type in mismatched_sql_resources: {:?}", + change + ); + } + } + } + info!("Infrastructure map successfully reconciled with actual database state"); Ok(reconciled_map) } @@ -336,6 +394,7 @@ pub async fn plan_changes( .values() .map(|t| t.id(&target_infra_map.default_database)) .collect(), + &target_infra_map.sql_resources.keys().cloned().collect(), olap_client, ) .await? @@ -397,12 +456,14 @@ pub async fn plan_changes( #[cfg(test)] mod tests { use super::*; + use crate::framework::core::infrastructure::sql_resource::SqlResource; use crate::framework::core::infrastructure::table::{ Column, ColumnType, IntType, OrderBy, Table, }; use crate::framework::core::infrastructure_map::{PrimitiveSignature, PrimitiveTypes}; use crate::framework::core::partial_infrastructure_map::LifeCycle; use crate::framework::versions::Version; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; use crate::infrastructure::olap::clickhouse::TableWithUnsupportedType; use crate::infrastructure::olap::OlapChangesError; use crate::infrastructure::olap::OlapOperations; @@ -412,6 +473,7 @@ mod tests { // Mock OLAP client for testing struct MockOlapClient { tables: Vec
, + sql_resources: Vec, } #[async_trait] @@ -423,6 +485,14 @@ mod tests { ) -> Result<(Vec
, Vec), OlapChangesError> { Ok((self.tables.clone(), vec![])) } + + async fn list_sql_resources( + &self, + _db_name: &str, + _default_database: &str, + ) -> Result, OlapChangesError> { + Ok(self.sql_resources.clone()) + } } // Helper function to create a test table @@ -443,7 +513,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -456,6 +526,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, } } @@ -506,6 +578,7 @@ mod tests { // Create mock OLAP client with one table let mock_client = MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }; // Create empty infrastructure map (no tables) @@ -529,36 +602,42 @@ mod tests { let mut target_ids = HashSet::new(); - // Reconcile the infrastructure map + // Test 1: Empty target_ids = no managed tables, so unmapped tables are filtered out + // External tables are not accidentally included let reconciled = reconcile_with_reality( &project, &infra_map, - &target_ids, + &HashSet::new(), + &HashSet::new(), MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }, ) .await .unwrap(); - // The reconciled map should not contain the unmapped table (ignoring unmapped tables) + // With empty target_ids, the unmapped table should NOT be added (external table) assert_eq!(reconciled.tables.len(), 0); target_ids.insert("test_unmapped_table_1_0_0".to_string()); - // Reconcile the infrastructure map + // Test 2: Non-empty target_ids = only include if in set + // This is the behavior used by `moose dev`, `moose prod`, etc. let reconciled = reconcile_with_reality( &project, &infra_map, &target_ids, + &HashSet::new(), MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }, ) .await .unwrap(); - // The reconciled map should not contain the unmapped table (ignoring unmapped tables) + // When target_ids contains the table ID, it's included assert_eq!(reconciled.tables.len(), 1); } @@ -568,7 +647,10 @@ mod tests { let table = create_test_table("missing_table"); // Create mock OLAP client with no tables - let mock_client = MockOlapClient { tables: vec![] }; + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![], + }; // Create infrastructure map with one table let mut infra_map = InfrastructureMap::default(); @@ -593,7 +675,10 @@ mod tests { assert_eq!(discrepancies.missing_tables[0], "missing_table"); // Create another mock client for the reconciliation - let reconcile_mock_client = MockOlapClient { tables: vec![] }; + let reconcile_mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![], + }; let target_table_ids = HashSet::new(); @@ -602,6 +687,7 @@ mod tests { &project, &infra_map, &target_table_ids, + &HashSet::new(), reconcile_mock_client, ) .await @@ -640,6 +726,7 @@ mod tests { database: Some(db_name.clone()), ..actual_table.clone() }], + sql_resources: vec![], }; // Create infrastructure map with the infra table (no extra column) @@ -670,6 +757,7 @@ mod tests { database: Some(db_name.clone()), ..actual_table.clone() }], + sql_resources: vec![], }; let target_table_ids = HashSet::new(); @@ -678,6 +766,7 @@ mod tests { &project, &infra_map, &target_table_ids, + &HashSet::new(), reconcile_mock_client, ) .await @@ -701,6 +790,7 @@ mod tests { // Create mock OLAP client with the table let mock_client = MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }; // Create infrastructure map with the same table @@ -727,6 +817,7 @@ mod tests { // Create another mock client for reconciliation let reconcile_mock_client = MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }; let target_table_ids = HashSet::new(); @@ -735,6 +826,7 @@ mod tests { &project, &infra_map, &target_table_ids, + &HashSet::new(), reconcile_mock_client, ) .await @@ -786,13 +878,20 @@ mod tests { ); // Also verify that reconciliation preserves the database name - let mock_client = MockOlapClient { tables: vec![] }; + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![], + }; - let target_table_ids = HashSet::new(); - let reconciled = - reconcile_with_reality(&project, &loaded_map, &target_table_ids, mock_client) - .await - .unwrap(); + let reconciled = reconcile_with_reality( + &project, + &loaded_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); assert_eq!( reconciled.default_database, CUSTOM_DB_NAME, @@ -839,13 +938,20 @@ mod tests { ); // Now test reconciliation - this is where the fix should be applied - let mock_client = MockOlapClient { tables: vec![] }; + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![], + }; - let target_table_ids = HashSet::new(); - let reconciled = - reconcile_with_reality(&project, &loaded_map, &target_table_ids, mock_client) - .await - .unwrap(); + let reconciled = reconcile_with_reality( + &project, + &loaded_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); // After reconciliation, the database name should be set from the project config assert_eq!( @@ -917,4 +1023,255 @@ mod tests { // but they don't directly use clickhouse_config.db_name. // The bug in ENG-1160 is specifically about default_database being hardcoded to "local". } + + #[tokio::test] + async fn test_reconcile_preserves_cluster_name() { + // Create a test table with a cluster name + let mut table = create_test_table("clustered_table"); + table.cluster_name = Some("test_cluster".to_string()); + + // Create mock OLAP client with the table (but cluster_name will be lost in reality) + let mut table_from_reality = table.clone(); + table_from_reality.cluster_name = None; // ClickHouse system.tables doesn't preserve this + + let mock_client = MockOlapClient { + tables: vec![table_from_reality], + sql_resources: vec![], + }; + + // Create infrastructure map with the table including cluster_name + let mut infra_map = InfrastructureMap::default(); + infra_map + .tables + .insert(table.id(DEFAULT_DATABASE_NAME), table.clone()); + + // Create test project + let project = create_test_project(); + + // Reconcile the infrastructure map + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); + + // The reconciled map should preserve cluster_name from the infra map + assert_eq!(reconciled.tables.len(), 1); + let reconciled_table = reconciled.tables.values().next().unwrap(); + assert_eq!( + reconciled_table.cluster_name, + Some("test_cluster".to_string()), + "cluster_name should be preserved from infra map" + ); + } + + #[tokio::test] + async fn test_reconcile_with_reality_mismatched_table_preserves_cluster() { + // Create a table that exists in both places but with different schemas + let mut infra_table = create_test_table("mismatched_table"); + infra_table.cluster_name = Some("production_cluster".to_string()); + + let mut reality_table = create_test_table("mismatched_table"); + // Reality table has no cluster_name (as ClickHouse doesn't preserve it) + reality_table.cluster_name = None; + // Add a column difference to make them mismatched + reality_table + .columns + .push(crate::framework::core::infrastructure::table::Column { + name: "extra_col".to_string(), + data_type: crate::framework::core::infrastructure::table::ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }); + + // Create mock OLAP client with the reality table + let mock_client = MockOlapClient { + tables: vec![reality_table.clone()], + sql_resources: vec![], + }; + + // Create infrastructure map with the infra table + let mut infra_map = InfrastructureMap::default(); + infra_map + .tables + .insert(infra_table.id(DEFAULT_DATABASE_NAME), infra_table.clone()); + + // Create test project + let project = create_test_project(); + + // Reconcile the infrastructure map + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); + + // The reconciled map should still have the table + assert_eq!(reconciled.tables.len(), 1); + let reconciled_table = reconciled.tables.values().next().unwrap(); + + // The cluster_name should be preserved from the infra map + assert_eq!( + reconciled_table.cluster_name, + Some("production_cluster".to_string()), + "cluster_name should be preserved from infra map even when schema differs" + ); + + // But the columns should be updated from reality + assert_eq!( + reconciled_table.columns.len(), + reality_table.columns.len(), + "columns should be updated from reality" + ); + } + + #[tokio::test] + async fn test_reconcile_sql_resources_with_empty_filter_ignores_external() { + // Create a SQL resource that exists in the database but not in the infra map + let sql_resource = SqlResource { + name: "unmapped_view".to_string(), + database: Some("test".to_string()), + setup: vec!["CREATE VIEW unmapped_view AS SELECT * FROM source".to_string()], + teardown: vec!["DROP VIEW IF EXISTS unmapped_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![sql_resource.clone()], + }; + + let infra_map = InfrastructureMap::default(); + let project = create_test_project(); + + // Empty target_sql_resource_ids means no managed resources - external resources are filtered out + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); + + // Empty filter = no managed resources, so unmapped SQL resource is NOT included (external) + assert_eq!(reconciled.sql_resources.len(), 0); + } + + #[tokio::test] + async fn test_reconcile_sql_resources_with_specific_filter() { + // Create two SQL resources in the database + let view_a = SqlResource { + name: "view_a".to_string(), + database: Some("test".to_string()), + setup: vec!["CREATE VIEW view_a AS SELECT * FROM table_a".to_string()], + teardown: vec!["DROP VIEW IF EXISTS view_a".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let view_b = SqlResource { + name: "view_b".to_string(), + database: Some("test".to_string()), + setup: vec!["CREATE VIEW view_b AS SELECT * FROM table_b".to_string()], + teardown: vec!["DROP VIEW IF EXISTS view_b".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![view_a.clone(), view_b.clone()], + }; + + let infra_map = InfrastructureMap::default(); + let project = create_test_project(); + + // Only include view_a in the filter + let mut target_sql_resource_ids = HashSet::new(); + target_sql_resource_ids.insert(view_a.name.clone()); + + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &target_sql_resource_ids, + mock_client, + ) + .await + .unwrap(); + + // Only view_a should be included, view_b should be filtered out + assert_eq!(reconciled.sql_resources.len(), 1); + assert!(reconciled.sql_resources.contains_key(&view_a.name)); + assert!(!reconciled.sql_resources.contains_key(&view_b.name)); + } + + #[tokio::test] + async fn test_reconcile_sql_resources_missing_and_mismatched() { + // Create SQL resource that's in the infra map + let existing_view = SqlResource { + name: "existing_view".to_string(), + database: None, + setup: vec!["CREATE VIEW existing_view AS SELECT * FROM old_table".to_string()], + teardown: vec!["DROP VIEW IF EXISTS existing_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + // Reality has a different version (mismatched) + let reality_view = SqlResource { + name: "existing_view".to_string(), + database: Some("test".to_string()), + setup: vec!["CREATE VIEW existing_view AS SELECT * FROM new_table".to_string()], + teardown: vec!["DROP VIEW IF EXISTS existing_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![reality_view.clone()], + }; + + // Create infra map with the existing view + let mut infra_map = InfrastructureMap::default(); + infra_map + .sql_resources + .insert(existing_view.name.clone(), existing_view.clone()); + + let project = create_test_project(); + let mut target_sql_resource_ids = HashSet::new(); + target_sql_resource_ids.insert(existing_view.name.clone()); + + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &target_sql_resource_ids, + mock_client, + ) + .await + .unwrap(); + + // The view should be updated to match reality + assert_eq!(reconciled.sql_resources.len(), 1); + let reconciled_view = reconciled.sql_resources.get(&reality_view.name).unwrap(); + assert_eq!(reconciled_view.setup, reality_view.setup); + } } diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index 65f4862187..2d0260591e 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -10,11 +10,68 @@ pub enum ValidationError { #[error("Table validation failed: {0}")] TableValidation(String), + + #[error("Cluster validation failed: {0}")] + ClusterValidation(String), +} + +/// Validates that all tables with cluster_name reference clusters defined in the config +fn validate_cluster_references(project: &Project, plan: &InfraPlan) -> Result<(), ValidationError> { + let defined_clusters = project.clickhouse_config.clusters.as_ref(); + + // Get all cluster names from the defined clusters + let cluster_names: Vec = defined_clusters + .map(|clusters| clusters.iter().map(|c| c.name.clone()).collect()) + .unwrap_or_default(); + + // Check all tables in the target infrastructure map + for table in plan.target_infra_map.tables.values() { + if let Some(cluster_name) = &table.cluster_name { + // If table has a cluster_name, verify it's defined in the config + if cluster_names.is_empty() { + // No clusters defined in config but table references one + return Err(ValidationError::ClusterValidation(format!( + "Table '{}' references cluster '{}', but no clusters are defined in moose.config.toml.\n\ + \n\ + To fix this, add the cluster definition to your config:\n\ + \n\ + [[clickhouse_config.clusters]]\n\ + name = \"{}\"\n", + table.name, cluster_name, cluster_name + ))); + } else if !cluster_names.contains(cluster_name) { + // Table references a cluster that's not defined + return Err(ValidationError::ClusterValidation(format!( + "Table '{}' references cluster '{}', which is not defined in moose.config.toml.\n\ + \n\ + Available clusters: {}\n\ + \n\ + To fix this, either:\n\ + 1. Add the cluster to your config:\n\ + [[clickhouse_config.clusters]]\n\ + name = \"{}\"\n\ + \n\ + 2. Or change the table to use an existing cluster: {}\n", + table.name, + cluster_name, + cluster_names.join(", "), + cluster_name, + cluster_names.join(", ") + ))); + } + // Cluster is defined, continue validation + } + } + + Ok(()) } pub fn validate(project: &Project, plan: &InfraPlan) -> Result<(), ValidationError> { stream::validate_changes(project, &plan.changes.streaming_engine_changes)?; + // Validate cluster references + validate_cluster_references(project, plan)?; + // Check for validation errors in OLAP changes for change in &plan.changes.olap_changes { if let OlapChange::Table(TableChange::ValidationError { message, .. }) = change { @@ -24,3 +81,280 @@ pub fn validate(project: &Project, plan: &InfraPlan) -> Result<(), ValidationErr Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::framework::core::infrastructure::table::{Column, ColumnType, OrderBy, Table}; + use crate::framework::core::infrastructure_map::{ + InfrastructureMap, PrimitiveSignature, PrimitiveTypes, + }; + use crate::framework::core::partial_infrastructure_map::LifeCycle; + use crate::framework::core::plan::InfraPlan; + use crate::framework::versions::Version; + use crate::infrastructure::olap::clickhouse::{ + config::{ClickHouseConfig, ClusterConfig}, + queries::ClickhouseEngine, + }; + use crate::project::{Project, ProjectFeatures}; + use std::collections::HashMap; + use std::path::PathBuf; + + fn create_test_project(clusters: Option>) -> Project { + Project { + language: crate::framework::languages::SupportedLanguages::Typescript, + redpanda_config: crate::infrastructure::stream::kafka::models::KafkaConfig::default(), + clickhouse_config: ClickHouseConfig { + db_name: "local".to_string(), + user: "default".to_string(), + password: "".to_string(), + use_ssl: false, + host: "localhost".to_string(), + host_port: 18123, + native_port: 9000, + host_data_path: None, + additional_databases: vec![], + clusters, + }, + http_server_config: crate::cli::local_webserver::LocalWebserverConfig::default(), + redis_config: crate::infrastructure::redis::redis_client::RedisConfig::default(), + git_config: crate::utilities::git::GitConfig::default(), + temporal_config: + crate::infrastructure::orchestration::temporal::TemporalConfig::default(), + state_config: crate::project::StateConfig::default(), + migration_config: crate::project::MigrationConfig::default(), + language_project_config: crate::project::LanguageProjectConfig::default(), + project_location: PathBuf::from("/test"), + is_production: false, + supported_old_versions: HashMap::new(), + jwt: None, + authentication: crate::project::AuthenticationConfig::default(), + features: ProjectFeatures::default(), + load_infra: None, + typescript_config: crate::project::TypescriptConfig::default(), + source_dir: crate::project::default_source_dir(), + } + } + + fn create_test_table(name: &str, cluster_name: Option) -> Table { + Table { + name: name.to_string(), + columns: vec![Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::default(), + version: Some(Version::from_string("1.0.0".to_string())), + source_primitive: PrimitiveSignature { + name: name.to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name, + primary_key_expression: None, + } + } + + fn create_test_plan(tables: Vec
) -> InfraPlan { + let mut table_map = HashMap::new(); + for table in tables { + table_map.insert(format!("local_{}", table.name), table); + } + + InfraPlan { + target_infra_map: InfrastructureMap { + default_database: "local".to_string(), + tables: table_map, + topics: HashMap::new(), + api_endpoints: HashMap::new(), + views: HashMap::new(), + topic_to_table_sync_processes: HashMap::new(), + topic_to_topic_sync_processes: HashMap::new(), + function_processes: HashMap::new(), + block_db_processes: crate::framework::core::infrastructure::olap_process::OlapProcess {}, + consumption_api_web_server: crate::framework::core::infrastructure::consumption_webserver::ConsumptionApiWebServer {}, + orchestration_workers: HashMap::new(), + sql_resources: HashMap::new(), + workflows: HashMap::new(), + web_apps: HashMap::new(), + }, + changes: Default::default(), + } + } + + #[test] + fn test_validate_no_clusters_defined_but_table_references_one() { + let project = create_test_project(None); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("test_cluster")); + assert!(msg.contains("no clusters are defined")); + } + _ => panic!("Expected ClusterValidation error"), + } + } + + #[test] + fn test_validate_table_references_undefined_cluster() { + let project = create_test_project(Some(vec![ + ClusterConfig { + name: "cluster_a".to_string(), + }, + ClusterConfig { + name: "cluster_b".to_string(), + }, + ])); + let table = create_test_table("test_table", Some("cluster_c".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("cluster_c")); + assert!(msg.contains("cluster_a")); + assert!(msg.contains("cluster_b")); + } + _ => panic!("Expected ClusterValidation error"), + } + } + + #[test] + fn test_validate_table_references_valid_cluster() { + let project = create_test_project(Some(vec![ClusterConfig { + name: "test_cluster".to_string(), + }])); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_table_with_no_cluster_is_allowed() { + let project = create_test_project(Some(vec![ClusterConfig { + name: "test_cluster".to_string(), + }])); + let table = create_test_table("test_table", None); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_multiple_tables_different_clusters() { + let project = create_test_project(Some(vec![ + ClusterConfig { + name: "cluster_a".to_string(), + }, + ClusterConfig { + name: "cluster_b".to_string(), + }, + ])); + let table1 = create_test_table("table1", Some("cluster_a".to_string())); + let table2 = create_test_table("table2", Some("cluster_b".to_string())); + let plan = create_test_plan(vec![table1, table2]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_empty_clusters_list() { + let project = create_test_project(Some(vec![])); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("test_cluster")); + } + _ => panic!("Expected ClusterValidation error"), + } + } + + // Helper to create a table with a specific engine + fn create_table_with_engine( + name: &str, + cluster_name: Option, + engine: crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine, + ) -> Table { + Table { + name: name.to_string(), + columns: vec![Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine, + version: Some(Version::from_string("1.0.0".to_string())), + source_primitive: PrimitiveSignature { + name: name.to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name, + primary_key_expression: None, + } + } + + #[test] + fn test_non_replicated_engine_without_cluster_succeeds() { + let project = create_test_project(None); + let table = create_table_with_engine("test_table", None, ClickhouseEngine::MergeTree); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } +} diff --git a/apps/framework-cli/src/framework/core/primitive_map.rs b/apps/framework-cli/src/framework/core/primitive_map.rs index 4675988ae6..8f5245e1a6 100644 --- a/apps/framework-cli/src/framework/core/primitive_map.rs +++ b/apps/framework-cli/src/framework/core/primitive_map.rs @@ -1,8 +1,8 @@ -use log::warn; use std::{ collections::{HashMap, HashSet}, path::Path, }; +use tracing::warn; use walkdir::WalkDir; use crate::framework::data_model::config::DataModelConfig; @@ -135,7 +135,7 @@ impl PrimitiveMap { let mut primitive_map = PrimitiveMap::default(); for version in project.versions() { - log::debug!("Loading Versioned primitive map for version: {}", version); + tracing::debug!("Loading Versioned primitive map for version: {}", version); PrimitiveMap::load_versioned(project, &version, &mut primitive_map).await?; } @@ -150,7 +150,7 @@ impl PrimitiveMap { primitive_map.consumption = load_consumption(project).await?; - log::debug!("Loaded Versioned primitive map: {:?}", primitive_map); + tracing::debug!("Loaded Versioned primitive map: {:?}", primitive_map); primitive_map.validate()?; @@ -167,7 +167,7 @@ impl PrimitiveMap { primitive_map: &mut PrimitiveMap, ) -> Result<(), PrimitiveMapLoadingError> { let data_models_root = project.versioned_data_model_dir(version)?; - log::debug!("Loading data models from: {:?}", data_models_root); + tracing::debug!("Loading data models from: {:?}", data_models_root); for res_entry in WalkDir::new(data_models_root) { let entry = res_entry?; @@ -199,7 +199,7 @@ impl PrimitiveMap { ) -> Result, DataModelError> { let file_objects = data_model::parser::parse_data_model_file(file_path, version, project).await?; - log::debug!( + tracing::debug!( "Found the following data models: {:?} in path {:?}", file_objects.models, file_path @@ -232,7 +232,7 @@ impl PrimitiveMap { } } - log::debug!( + tracing::debug!( "Data Models matched with configuration: {:?} from file: {:?}", indexed_models, file_path diff --git a/apps/framework-cli/src/framework/core/state_storage.rs b/apps/framework-cli/src/framework/core/state_storage.rs index 794f14f5cf..077bcb518a 100644 --- a/apps/framework-cli/src/framework/core/state_storage.rs +++ b/apps/framework-cli/src/framework/core/state_storage.rs @@ -13,10 +13,10 @@ use crate::utilities::machine_id::get_or_create_machine_id; use anyhow::{Context, Result}; use async_trait::async_trait; use chrono::{DateTime, Duration, Utc}; -use log::{debug, info, warn}; use protobuf::Message; use serde::{Deserialize, Serialize}; use std::sync::Arc; +use tracing::{debug, info, warn}; /// Lock data for migration coordination #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/apps/framework-cli/src/framework/data_model/config.rs b/apps/framework-cli/src/framework/data_model/config.rs index bd222ecc16..07f6f38d86 100644 --- a/apps/framework-cli/src/framework/data_model/config.rs +++ b/apps/framework-cli/src/framework/data_model/config.rs @@ -6,10 +6,10 @@ use crate::framework::python::datamodel_config::execute_python_model_file_for_co use crate::framework::typescript::export_collectors::get_data_model_configs; use crate::project::Project; use crate::utilities::_true; -use log::info; use serde::Deserialize; use serde::Serialize; use std::ffi::OsStr; +use tracing::info; pub type ConfigIdentifier = String; diff --git a/apps/framework-cli/src/framework/data_model/model.rs b/apps/framework-cli/src/framework/data_model/model.rs index f8918853d1..da20ff2dd8 100644 --- a/apps/framework-cli/src/framework/data_model/model.rs +++ b/apps/framework-cli/src/framework/data_model/model.rs @@ -36,14 +36,14 @@ impl DataModel { OrderBy::Fields(self.primary_key_columns()) }; - let engine = - self.config - .storage - .deduplicate - .then_some(ClickhouseEngine::ReplacingMergeTree { - ver: None, - is_deleted: None, - }); + let engine = if self.config.storage.deduplicate { + ClickhouseEngine::ReplacingMergeTree { + ver: None, + is_deleted: None, + } + } else { + ClickhouseEngine::MergeTree + }; // Create the table first, then compute the combined hash that includes database let mut table = Table { @@ -70,6 +70,8 @@ impl DataModel { indexes: vec![], database: None, // Database defaults to global config table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Compute hash that includes both engine params and database diff --git a/apps/framework-cli/src/framework/data_model/parser.rs b/apps/framework-cli/src/framework/data_model/parser.rs index 9d89291b2c..77a34c4601 100644 --- a/apps/framework-cli/src/framework/data_model/parser.rs +++ b/apps/framework-cli/src/framework/data_model/parser.rs @@ -4,9 +4,9 @@ use crate::{ framework::{core::infrastructure::table::DataEnum, python, typescript}, project::Project, }; -use log::info; use serde::Deserialize; use std::path::Path; +use tracing::info; #[derive(Debug, thiserror::Error)] #[error("Failed to parse the data model file")] diff --git a/apps/framework-cli/src/framework/python/blocks.rs b/apps/framework-cli/src/framework/python/blocks.rs index d5a9f499fa..daec94cdbf 100644 --- a/apps/framework-cli/src/framework/python/blocks.rs +++ b/apps/framework-cli/src/framework/python/blocks.rs @@ -1,7 +1,7 @@ -use log::{error, info}; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info}; use super::executor; use crate::framework::blocks::model::BlocksError; diff --git a/apps/framework-cli/src/framework/python/consumption.rs b/apps/framework-cli/src/framework/python/consumption.rs index 5f84b1615a..4a31bedc6b 100644 --- a/apps/framework-cli/src/framework/python/consumption.rs +++ b/apps/framework-cli/src/framework/python/consumption.rs @@ -5,11 +5,11 @@ use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::processes::consumption_registry::ConsumptionError; use crate::project::{JwtConfig, Project}; use crate::utilities::constants::{CONSUMPTION_WRAPPER_PACKAGE_NAME, UTILS_WRAPPER_PACKAGE_NAME}; -use log::{error, info}; use std::fs; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info}; use super::executor; diff --git a/apps/framework-cli/src/framework/python/datamodel_config.rs b/apps/framework-cli/src/framework/python/datamodel_config.rs index 8207f7d097..b540dddeee 100644 --- a/apps/framework-cli/src/framework/python/datamodel_config.rs +++ b/apps/framework-cli/src/framework/python/datamodel_config.rs @@ -1,10 +1,10 @@ -use log::info; use serde::Deserialize; use serde::Serialize; use std::{ collections::HashMap, path::{absolute, Path}, }; +use tracing::info; use crate::framework::core::partial_infrastructure_map::{ DmV2LoadingError, PartialInfrastructureMap, diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index b97842eac8..0416662b27 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -13,6 +13,7 @@ use std::fmt::Write; use std::sync::LazyLock; use crate::infrastructure::olap::clickhouse::extract_version_from_table_name; +use crate::infrastructure::olap::clickhouse::queries::BufferEngine; /// Language-agnostic sanitization: replace common separators with spaces to create word boundaries. pub use ident::sanitize_identifier; @@ -632,19 +633,8 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri // Generate model classes for table in tables { writeln!(output, "class {}(BaseModel):", table.name).unwrap(); - - let primary_key = table - .columns - .iter() - .filter_map(|column| { - if column.primary_key { - Some(column.name.to_string()) - } else { - None - } - }) - .collect::>(); - let can_use_key_wrapping = table.order_by.starts_with_fields(&primary_key); + // list_tables sets primary_key_expression to Some if Key wrapping is insufficient to represent the PK + let can_use_key_wrapping = table.primary_key_expression.is_none(); for column in &table.columns { let type_str = map_column_type_to_python( @@ -724,6 +714,11 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri ) .unwrap(); writeln!(output, " {order_by_spec},").unwrap(); + + if let Some(ref pk_expr) = table.primary_key_expression { + // Use the explicit primary_key_expression directly + writeln!(output, " primary_key_expression={:?},", pk_expr).unwrap(); + } if let Some(partition_by) = &table.partition_by { writeln!(output, " partition_by={:?},", partition_by).unwrap(); } @@ -744,209 +739,228 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri if let Some(ttl_expr) = &table.table_ttl_setting { writeln!(output, " ttl={:?},", ttl_expr).unwrap(); } - if let Some(engine) = &table.engine { - match engine { - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3Queue { - s3_path, - format, - compression, - headers, - aws_access_key_id, - aws_secret_access_key, - } => { - // Generate S3Queue configuration object - writeln!(output, " engine=S3QueueEngine(").unwrap(); - writeln!(output, " s3_path={:?},", s3_path).unwrap(); - writeln!(output, " format={:?},", format).unwrap(); - if let Some(compression) = compression { - writeln!(output, " compression={:?},", compression).unwrap(); - } - if let Some(key_id) = aws_access_key_id { - writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); - } - if let Some(secret) = aws_secret_access_key { - writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); - } - if let Some(headers) = headers { - write!(output, " headers={{").unwrap(); - for (i, (key, value)) in headers.iter().enumerate() { - if i > 0 { write!(output, ",").unwrap(); } - write!(output, " {:?}: {:?}", key, value).unwrap(); - } - writeln!(output, " }},").unwrap(); - } - writeln!(output, " ),").unwrap(); + match &table.engine { + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3Queue { + s3_path, + format, + compression, + headers, + aws_access_key_id, + aws_secret_access_key, + } => { + // Generate S3Queue configuration object + writeln!(output, " engine=S3QueueEngine(").unwrap(); + writeln!(output, " s3_path={:?},", s3_path).unwrap(); + writeln!(output, " format={:?},", format).unwrap(); + if let Some(compression) = compression { + writeln!(output, " compression={:?},", compression).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::MergeTree => { - writeln!(output, " engine=MergeTreeEngine(),").unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplacingMergeTree { ver, is_deleted } => { - // Emit ReplacingMergeTreeEngine with parameters if present - write!(output, " engine=ReplacingMergeTreeEngine(").unwrap(); - if let Some(ver_col) = ver { - write!(output, "ver=\"{}\"", ver_col).unwrap(); - if is_deleted.is_some() { - write!(output, ", ").unwrap(); - } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); + } + if let Some(headers) = headers { + write!(output, " headers={{").unwrap(); + for (i, (key, value)) in headers.iter().enumerate() { + if i > 0 { write!(output, ",").unwrap(); } + write!(output, " {:?}: {:?}", key, value).unwrap(); } - if let Some(is_deleted_col) = is_deleted { - write!(output, "is_deleted=\"{}\"", is_deleted_col).unwrap(); + writeln!(output, " }},").unwrap(); + } + writeln!(output, " ),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::MergeTree => { + writeln!(output, " engine=MergeTreeEngine(),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplacingMergeTree { ver, is_deleted } => { + // Emit ReplacingMergeTreeEngine with parameters if present + write!(output, " engine=ReplacingMergeTreeEngine(").unwrap(); + if let Some(ver_col) = ver { + write!(output, "ver=\"{}\"", ver_col).unwrap(); + if is_deleted.is_some() { + write!(output, ", ").unwrap(); } - writeln!(output, "),").unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::AggregatingMergeTree => { - writeln!(output, " engine=AggregatingMergeTreeEngine(),").unwrap(); + if let Some(is_deleted_col) = is_deleted { + write!(output, "is_deleted=\"{}\"", is_deleted_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::SummingMergeTree { columns } => { - write!(output, " engine=SummingMergeTreeEngine(").unwrap(); - if let Some(cols) = columns { - if !cols.is_empty() { - write!(output, "columns={:?}", cols).unwrap(); - } + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::AggregatingMergeTree => { + writeln!(output, " engine=AggregatingMergeTreeEngine(),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::SummingMergeTree { columns } => { + write!(output, " engine=SummingMergeTreeEngine(").unwrap(); + if let Some(cols) = columns { + if !cols.is_empty() { + write!(output, "columns={:?}", cols).unwrap(); } - writeln!(output, "),").unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedMergeTree { - keeper_path, - replica_name, - } => { - write!(output, " engine=ReplicatedMergeTreeEngine(").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - write!(output, "keeper_path={:?}, replica_name={:?}", path, name).unwrap(); - } - writeln!(output, "),").unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedMergeTree { + keeper_path, + replica_name, + } => { + write!(output, " engine=ReplicatedMergeTreeEngine(").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + write!(output, "keeper_path={:?}, replica_name={:?}", path, name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedReplacingMergeTree { - keeper_path, - replica_name, - ver, - is_deleted, - } => { - write!(output, " engine=ReplicatedReplacingMergeTreeEngine(").unwrap(); - let mut params = vec![]; - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - params.push(format!("keeper_path={:?}, replica_name={:?}", path, name)); - } - if let Some(v) = ver { - params.push(format!("ver={:?}", v)); - } - if let Some(d) = is_deleted { - params.push(format!("is_deleted={:?}", d)); - } - write!(output, "{}", params.join(", ")).unwrap(); - writeln!(output, "),").unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedReplacingMergeTree { + keeper_path, + replica_name, + ver, + is_deleted, + } => { + write!(output, " engine=ReplicatedReplacingMergeTreeEngine(").unwrap(); + let mut params = vec![]; + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + params.push(format!("keeper_path={:?}, replica_name={:?}", path, name)); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedAggregatingMergeTree { - keeper_path, - replica_name, - } => { - write!(output, " engine=ReplicatedAggregatingMergeTreeEngine(").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - write!(output, "keeper_path={:?}, replica_name={:?}", path, name).unwrap(); - } - writeln!(output, "),").unwrap(); + if let Some(v) = ver { + params.push(format!("ver={:?}", v)); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedSummingMergeTree { - keeper_path, - replica_name, - columns, - } => { - write!(output, " engine=ReplicatedSummingMergeTreeEngine(").unwrap(); - let mut params = vec![]; - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - params.push(format!("keeper_path={:?}, replica_name={:?}", path, name)); - } - if let Some(cols) = columns { - if !cols.is_empty() { - params.push(format!("columns={:?}", cols)); - } - } - write!(output, "{}", params.join(", ")).unwrap(); - writeln!(output, "),").unwrap(); + if let Some(d) = is_deleted { + params.push(format!("is_deleted={:?}", d)); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3 { - path, - format, - aws_access_key_id, - aws_secret_access_key, - compression, - partition_strategy, - partition_columns_in_data_file, - } => { - writeln!(output, " engine=S3Engine(").unwrap(); - writeln!(output, " path={:?},", path).unwrap(); - writeln!(output, " format={:?},", format).unwrap(); - if let Some(key_id) = aws_access_key_id { - writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); - } - if let Some(secret) = aws_secret_access_key { - writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); - } - if let Some(comp) = compression { - writeln!(output, " compression={:?},", comp).unwrap(); - } - if let Some(ps) = partition_strategy { - writeln!(output, " partition_strategy={:?},", ps).unwrap(); - } - if let Some(pc) = partition_columns_in_data_file { - writeln!(output, " partition_columns_in_data_file={:?},", pc).unwrap(); - } - writeln!(output, " ),").unwrap(); + write!(output, "{}", params.join(", ")).unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedAggregatingMergeTree { + keeper_path, + replica_name, + } => { + write!(output, " engine=ReplicatedAggregatingMergeTreeEngine(").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + write!(output, "keeper_path={:?}, replica_name={:?}", path, name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer { - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - flush_time, - flush_rows, - flush_bytes, - } => { - writeln!(output, " engine=BufferEngine(").unwrap(); - writeln!(output, " target_database={:?},", target_database).unwrap(); - writeln!(output, " target_table={:?},", target_table).unwrap(); - writeln!(output, " num_layers={},", num_layers).unwrap(); - writeln!(output, " min_time={},", min_time).unwrap(); - writeln!(output, " max_time={},", max_time).unwrap(); - writeln!(output, " min_rows={},", min_rows).unwrap(); - writeln!(output, " max_rows={},", max_rows).unwrap(); - writeln!(output, " min_bytes={},", min_bytes).unwrap(); - writeln!(output, " max_bytes={},", max_bytes).unwrap(); - if let Some(ft) = flush_time { - writeln!(output, " flush_time={},", ft).unwrap(); - } - if let Some(fr) = flush_rows { - writeln!(output, " flush_rows={},", fr).unwrap(); - } - if let Some(fb) = flush_bytes { - writeln!(output, " flush_bytes={},", fb).unwrap(); - } - writeln!(output, " ),").unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedSummingMergeTree { + keeper_path, + replica_name, + columns, + } => { + write!(output, " engine=ReplicatedSummingMergeTreeEngine(").unwrap(); + let mut params = vec![]; + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + params.push(format!("keeper_path={:?}, replica_name={:?}", path, name)); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Distributed { - cluster, - target_database, - target_table, - sharding_key, - policy_name, - } => { - writeln!(output, " engine=DistributedEngine(").unwrap(); - writeln!(output, " cluster={:?},", cluster).unwrap(); - writeln!(output, " target_database={:?},", target_database).unwrap(); - writeln!(output, " target_table={:?},", target_table).unwrap(); - if let Some(key) = sharding_key { - writeln!(output, " sharding_key={:?},", key).unwrap(); + if let Some(cols) = columns { + if !cols.is_empty() { + params.push(format!("columns={:?}", cols)); } - if let Some(policy) = policy_name { - writeln!(output, " policy_name={:?},", policy).unwrap(); - } - writeln!(output, " ),").unwrap(); } + write!(output, "{}", params.join(", ")).unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + partition_strategy, + partition_columns_in_data_file, + } => { + writeln!(output, " engine=S3Engine(").unwrap(); + writeln!(output, " path={:?},", path).unwrap(); + writeln!(output, " format={:?},", format).unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); + } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); + } + if let Some(comp) = compression { + writeln!(output, " compression={:?},", comp).unwrap(); + } + if let Some(ps) = partition_strategy { + writeln!(output, " partition_strategy={:?},", ps).unwrap(); + } + if let Some(pc) = partition_columns_in_data_file { + writeln!(output, " partition_columns_in_data_file={:?},", pc).unwrap(); + } + writeln!(output, " ),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + writeln!(output, " engine=BufferEngine(").unwrap(); + writeln!(output, " target_database={:?},", target_database).unwrap(); + writeln!(output, " target_table={:?},", target_table).unwrap(); + writeln!(output, " num_layers={},", num_layers).unwrap(); + writeln!(output, " min_time={},", min_time).unwrap(); + writeln!(output, " max_time={},", max_time).unwrap(); + writeln!(output, " min_rows={},", min_rows).unwrap(); + writeln!(output, " max_rows={},", max_rows).unwrap(); + writeln!(output, " min_bytes={},", min_bytes).unwrap(); + writeln!(output, " max_bytes={},", max_bytes).unwrap(); + if let Some(ft) = flush_time { + writeln!(output, " flush_time={},", ft).unwrap(); + } + if let Some(fr) = flush_rows { + writeln!(output, " flush_rows={},", fr).unwrap(); + } + if let Some(fb) = flush_bytes { + writeln!(output, " flush_bytes={},", fb).unwrap(); + } + writeln!(output, " ),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + writeln!(output, " engine=DistributedEngine(").unwrap(); + writeln!(output, " cluster={:?},", cluster).unwrap(); + writeln!(output, " target_database={:?},", target_database).unwrap(); + writeln!(output, " target_table={:?},", target_table).unwrap(); + if let Some(key) = sharding_key { + writeln!(output, " sharding_key={:?},", key).unwrap(); + } + if let Some(policy) = policy_name { + writeln!(output, " policy_name={:?},", policy).unwrap(); + } + writeln!(output, " ),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + writeln!(output, " engine=IcebergS3Engine(").unwrap(); + writeln!(output, " path={:?},", path).unwrap(); + writeln!(output, " format={:?},", format).unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); + } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); + } + if let Some(comp) = compression { + writeln!(output, " compression={:?},", comp).unwrap(); + } + writeln!(output, " ),").unwrap(); } } if let Some(version) = &table.version { @@ -1052,7 +1066,7 @@ mod tests { order_by: OrderBy::Fields(vec!["primary_key".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Foo".to_string(), @@ -1065,6 +1079,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1145,7 +1161,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "NestedArray".to_string(), @@ -1158,6 +1174,8 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1263,7 +1281,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "User".to_string(), @@ -1276,6 +1294,8 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1330,14 +1350,14 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::S3Queue { + engine: ClickhouseEngine::S3Queue { s3_path: "s3://bucket/path".to_string(), format: "JSONEachRow".to_string(), compression: Some("gzip".to_string()), headers: None, aws_access_key_id: None, aws_secret_access_key: None, - }), + }, version: None, source_primitive: PrimitiveSignature { name: "Events".to_string(), @@ -1354,6 +1374,8 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1385,10 +1407,10 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::ReplacingMergeTree { + engine: ClickhouseEngine::ReplacingMergeTree { ver: None, is_deleted: None, - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1411,6 +1433,8 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1463,10 +1487,10 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::ReplacingMergeTree { + engine: ClickhouseEngine::ReplacingMergeTree { ver: Some("version".to_string()), is_deleted: Some("is_deleted".to_string()), - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1479,6 +1503,8 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1537,7 +1563,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Location".to_string(), @@ -1550,6 +1576,8 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1619,7 +1647,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Events".to_string(), @@ -1632,6 +1660,8 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: Some("timestamp + INTERVAL 90 DAY DELETE".to_string()), + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1663,7 +1693,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "IndexPy".to_string(), @@ -1696,6 +1726,8 @@ user_table = OlapTable[User]("User", OlapConfig( ], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1748,7 +1780,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "JsonTest".to_string(), @@ -1760,6 +1792,8 @@ user_table = OlapTable[User]("User", OlapConfig( table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1800,7 +1834,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "ExternalData".to_string(), @@ -1813,6 +1847,8 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: Some("analytics_db".to_string()), table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); diff --git a/apps/framework-cli/src/framework/python/parser.rs b/apps/framework-cli/src/framework/python/parser.rs index feec1e9fd1..5d0b23b2e8 100644 --- a/apps/framework-cli/src/framework/python/parser.rs +++ b/apps/framework-cli/src/framework/python/parser.rs @@ -24,13 +24,13 @@ use crate::{ project::python_project::PythonProject, utilities::constants::REQUIREMENTS_TXT, }; -use log::warn; use rustpython_parser::{ ast::{self, Constant, Expr, ExprName, Identifier, Keyword, Stmt, StmtClassDef}, Parse, }; use std::fmt::Debug; use std::path::{Path, PathBuf}; +use tracing::warn; use crate::framework::core::infrastructure::table::{ Column, ColumnType, DataEnum as FrameworkEnum, FloatType, IntType, Nested, diff --git a/apps/framework-cli/src/framework/python/scripts_worker.rs b/apps/framework-cli/src/framework/python/scripts_worker.rs index 2472d094c7..b21d11c5f1 100644 --- a/apps/framework-cli/src/framework/python/scripts_worker.rs +++ b/apps/framework-cli/src/framework/python/scripts_worker.rs @@ -1,7 +1,7 @@ -use log::{error, info, warn}; use std::fs; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info, warn}; use crate::cli::display::{show_message_wrapper, Message, MessageType}; use crate::project::{Project, ProjectFileError}; diff --git a/apps/framework-cli/src/framework/python/streaming.rs b/apps/framework-cli/src/framework/python/streaming.rs index bf662da1d2..c54608f175 100644 --- a/apps/framework-cli/src/framework/python/streaming.rs +++ b/apps/framework-cli/src/framework/python/streaming.rs @@ -75,13 +75,13 @@ pub fn run( tokio::spawn(async move { while let Ok(Some(line)) = stdout_reader.next_line().await { - log::info!("{}", line); + tracing::info!("{}", line); } }); tokio::spawn(async move { while let Ok(Some(line)) = stderr_reader.next_line().await { - log::error!("{}", line); + tracing::error!("{}", line); } }); diff --git a/apps/framework-cli/src/framework/scripts/executor.rs b/apps/framework-cli/src/framework/scripts/executor.rs index 3e713d485e..23557cb8b8 100644 --- a/apps/framework-cli/src/framework/scripts/executor.rs +++ b/apps/framework-cli/src/framework/scripts/executor.rs @@ -1,8 +1,8 @@ use anyhow::Result; -use log::info; use serde::Serialize; use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; +use tracing::info; use super::{config::WorkflowConfig, Workflow}; use crate::framework::{ @@ -156,7 +156,7 @@ pub(crate) async fn execute_scheduled_workflows( match workflow.start(&project.temporal_config, None).await { Ok(_) => info!("Auto-started workflow: {}", workflow.name), - Err(e) => log::error!("Failed to auto-start workflow {}: {}", workflow.name, e), + Err(e) => tracing::error!("Failed to auto-start workflow {}: {}", workflow.name, e), } } } @@ -165,7 +165,7 @@ async fn list_running_workflows(project: &Project) -> HashSet { let client_manager = match TemporalClientManager::new_validate(&project.temporal_config, true) { Ok(manager) => manager, Err(e) => { - log::error!("Failed to create Temporal client manager: {}", e); + tracing::error!("Failed to create Temporal client manager: {}", e); return HashSet::new(); } }; @@ -195,7 +195,7 @@ async fn list_running_workflows(project: &Project) -> HashSet { .map(|execution_info| execution_info.workflow_id) .collect(), Err(e) => { - log::error!("Failed to list running workflows: {}", e); + tracing::error!("Failed to list running workflows: {}", e); HashSet::new() } } diff --git a/apps/framework-cli/src/framework/streaming/loader.rs b/apps/framework-cli/src/framework/streaming/loader.rs index 2ad560abe1..6d31bfe306 100644 --- a/apps/framework-cli/src/framework/streaming/loader.rs +++ b/apps/framework-cli/src/framework/streaming/loader.rs @@ -7,10 +7,10 @@ use crate::{ project::Project, utilities::constants::{PY_FLOW_FILE, TS_FLOW_FILE}, }; -use log::{debug, info, warn}; use regex::{Captures, Regex}; use std::ffi::OsStr; use std::{fs, path::Path}; +use tracing::{debug, info, warn}; const MIGRATION_REGEX: &str = r"^([a-zA-Z0-9_]+)_migrate__([0-9_]+)__(([a-zA-Z0-9_]+)__)?([0-9_]+)$"; diff --git a/apps/framework-cli/src/framework/typescript/blocks.rs b/apps/framework-cli/src/framework/typescript/blocks.rs index 5d3fcc84af..c1434afaaa 100644 --- a/apps/framework-cli/src/framework/typescript/blocks.rs +++ b/apps/framework-cli/src/framework/typescript/blocks.rs @@ -1,7 +1,7 @@ -use log::{error, info}; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info}; use super::bin; use crate::framework::blocks::model::BlocksError; diff --git a/apps/framework-cli/src/framework/typescript/consumption.rs b/apps/framework-cli/src/framework/typescript/consumption.rs index bb64f23df6..24df3a763f 100644 --- a/apps/framework-cli/src/framework/typescript/consumption.rs +++ b/apps/framework-cli/src/framework/typescript/consumption.rs @@ -5,11 +5,11 @@ use crate::framework::typescript::export_collectors::ExportCollectorError; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::processes::consumption_registry::ConsumptionError; use crate::project::{JwtConfig, Project}; -use log::{debug, error, info}; use serde_json::{Map, Value}; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{debug, error, info}; use super::bin; diff --git a/apps/framework-cli/src/framework/typescript/export_collectors.rs b/apps/framework-cli/src/framework/typescript/export_collectors.rs index 12a7e8bcdc..9b5a3898f2 100644 --- a/apps/framework-cli/src/framework/typescript/export_collectors.rs +++ b/apps/framework-cli/src/framework/typescript/export_collectors.rs @@ -3,12 +3,12 @@ use crate::framework::consumption::model::ConsumptionQueryParam; use crate::framework::data_model::config::{ConfigIdentifier, DataModelConfig}; use crate::framework::typescript::consumption::{extract_intput_param, extract_schema}; use crate::project::Project; -use log::debug; use serde_json::Value; use std::collections::{HashMap, HashSet}; use std::path::Path; use tokio::io::AsyncReadExt; use tokio::process::Child; +use tracing::debug; const EXPORT_SERIALIZER_BIN: &str = "export-serializer"; const EXPORT_FUNC_TYPE_BIN: &str = "consumption-type-serializer"; diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index 7e9e556260..55b688b44c 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -12,6 +12,7 @@ use std::fmt::Write; // Use shared, language-agnostic sanitization (underscores) from utilities use crate::infrastructure::olap::clickhouse::extract_version_from_table_name; +use crate::infrastructure::olap::clickhouse::queries::BufferEngine; pub use ident::sanitize_identifier; /// Map a string to a valid TypeScript PascalCase identifier (for types/classes/consts). @@ -548,18 +549,8 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> // Generate model interfaces for table in tables { - let primary_key = table - .columns - .iter() - .filter_map(|column| { - if column.primary_key { - Some(column.name.to_string()) - } else { - None - } - }) - .collect::>(); - let can_use_key_wrapping = table.order_by.starts_with_fields(&primary_key); + // list_tables sets primary_key_expression to Some if Key wrapping is insufficient to represent the PK + let can_use_key_wrapping = table.primary_key_expression.is_none(); writeln!(output, "export interface {} {{", table.name).unwrap(); @@ -629,6 +620,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> } OrderBy::SingleExpr(expr) => format!("orderByExpression: {:?}", expr), }; + let var_name = sanitize_typescript_identifier(&table.name); let (base_name, version) = extract_version_from_table_name(&table.name); @@ -644,6 +636,11 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> ) .unwrap(); writeln!(output, " {order_by_spec},").unwrap(); + + if let Some(ref pk_expr) = table.primary_key_expression { + // Use the explicit primary_key_expression directly + writeln!(output, " primaryKeyExpression: {:?},", pk_expr).unwrap(); + } if let Some(partition_by) = &table.partition_by { writeln!(output, " partitionBy: {:?},", partition_by).unwrap(); } @@ -653,184 +650,202 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> if let Some(database) = &table.database { writeln!(output, " database: {:?},", database).unwrap(); } - if let Some(engine) = &table.engine { - match engine { - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3Queue { - s3_path, - format, - compression, - headers, - aws_access_key_id, - aws_secret_access_key, - } => { - // For S3Queue, properties are at the same level as orderByFields - writeln!(output, " engine: ClickHouseEngines.S3Queue,").unwrap(); - writeln!(output, " s3Path: {:?},", s3_path).unwrap(); - writeln!(output, " format: {:?},", format).unwrap(); - if let Some(compression) = compression { - writeln!(output, " compression: {:?},", compression).unwrap(); - } - if let Some(key_id) = aws_access_key_id { - writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); - } - if let Some(secret) = aws_secret_access_key { - writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); - } - if let Some(headers) = headers { - write!(output, " headers: {{").unwrap(); - for (i, (key, value)) in headers.iter().enumerate() { - if i > 0 { write!(output, ",").unwrap(); } - write!(output, " {:?}: {:?}", key, value).unwrap(); - } - writeln!(output, " }},").unwrap(); - } + match &table.engine { + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3Queue { + s3_path, + format, + compression, + headers, + aws_access_key_id, + aws_secret_access_key, + } => { + // For S3Queue, properties are at the same level as orderByFields + writeln!(output, " engine: ClickHouseEngines.S3Queue,").unwrap(); + writeln!(output, " s3Path: {:?},", s3_path).unwrap(); + writeln!(output, " format: {:?},", format).unwrap(); + if let Some(compression) = compression { + writeln!(output, " compression: {:?},", compression).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::MergeTree => { - writeln!(output, " engine: ClickHouseEngines.MergeTree,").unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplacingMergeTree { ver, is_deleted } => { - // Emit ReplacingMergeTree engine configuration - writeln!(output, " engine: ClickHouseEngines.ReplacingMergeTree,").unwrap(); - if let Some(ver_col) = ver { - writeln!(output, " ver: \"{}\",", ver_col).unwrap(); - } - if let Some(is_deleted_col) = is_deleted { - writeln!(output, " isDeleted: \"{}\",", is_deleted_col).unwrap(); + if let Some(secret) = aws_secret_access_key { + writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); + } + if let Some(headers) = headers { + write!(output, " headers: {{").unwrap(); + for (i, (key, value)) in headers.iter().enumerate() { + if i > 0 { write!(output, ",").unwrap(); } + write!(output, " {:?}: {:?}", key, value).unwrap(); } + writeln!(output, " }},").unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::AggregatingMergeTree => { - writeln!(output, " engine: ClickHouseEngines.AggregatingMergeTree,").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::MergeTree => { + writeln!(output, " engine: ClickHouseEngines.MergeTree,").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplacingMergeTree { ver, is_deleted } => { + // Emit ReplacingMergeTree engine configuration + writeln!(output, " engine: ClickHouseEngines.ReplacingMergeTree,").unwrap(); + if let Some(ver_col) = ver { + writeln!(output, " ver: \"{}\",", ver_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::SummingMergeTree { columns } => { - writeln!(output, " engine: ClickHouseEngines.SummingMergeTree,").unwrap(); - if let Some(cols) = columns { - if !cols.is_empty() { - let col_list = cols.iter().map(|c| format!("{:?}", c)).collect::>().join(", "); - writeln!(output, " columns: [{}],", col_list).unwrap(); - } - } + if let Some(is_deleted_col) = is_deleted { + writeln!(output, " isDeleted: \"{}\",", is_deleted_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedMergeTree { keeper_path, replica_name } => { - writeln!(output, " engine: ClickHouseEngines.ReplicatedMergeTree,").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - writeln!(output, " keeperPath: {:?},", path).unwrap(); - writeln!(output, " replicaName: {:?},", name).unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::AggregatingMergeTree => { + writeln!(output, " engine: ClickHouseEngines.AggregatingMergeTree,").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::SummingMergeTree { columns } => { + writeln!(output, " engine: ClickHouseEngines.SummingMergeTree,").unwrap(); + if let Some(cols) = columns { + if !cols.is_empty() { + let col_list = cols.iter().map(|c| format!("{:?}", c)).collect::>().join(", "); + writeln!(output, " columns: [{}],", col_list).unwrap(); } } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path, replica_name, ver, is_deleted } => { - writeln!(output, " engine: ClickHouseEngines.ReplicatedReplacingMergeTree,").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - writeln!(output, " keeperPath: {:?},", path).unwrap(); - writeln!(output, " replicaName: {:?},", name).unwrap(); - } - if let Some(ver_col) = ver { - writeln!(output, " ver: {:?},", ver_col).unwrap(); - } - if let Some(is_deleted_col) = is_deleted { - writeln!(output, " isDeleted: {:?},", is_deleted_col).unwrap(); - } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedMergeTree { keeper_path, replica_name } => { + writeln!(output, " engine: ClickHouseEngines.ReplicatedMergeTree,").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + writeln!(output, " keeperPath: {:?},", path).unwrap(); + writeln!(output, " replicaName: {:?},", name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedAggregatingMergeTree { keeper_path, replica_name } => { - writeln!(output, " engine: ClickHouseEngines.ReplicatedAggregatingMergeTree,").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - writeln!(output, " keeperPath: {:?},", path).unwrap(); - writeln!(output, " replicaName: {:?},", name).unwrap(); - } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path, replica_name, ver, is_deleted } => { + writeln!(output, " engine: ClickHouseEngines.ReplicatedReplacingMergeTree,").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + writeln!(output, " keeperPath: {:?},", path).unwrap(); + writeln!(output, " replicaName: {:?},", name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedSummingMergeTree { keeper_path, replica_name, columns } => { - writeln!(output, " engine: ClickHouseEngines.ReplicatedSummingMergeTree,").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - writeln!(output, " keeperPath: {:?},", path).unwrap(); - writeln!(output, " replicaName: {:?},", name).unwrap(); - } - if let Some(cols) = columns { - if !cols.is_empty() { - let col_list = cols.iter().map(|c| format!("{:?}", c)).collect::>().join(", "); - writeln!(output, " columns: [{}],", col_list).unwrap(); - } - } + if let Some(ver_col) = ver { + writeln!(output, " ver: {:?},", ver_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3 { - path, - format, - aws_access_key_id, - aws_secret_access_key, - compression, - partition_strategy, - partition_columns_in_data_file, - } => { - writeln!(output, " engine: ClickHouseEngines.S3,").unwrap(); - writeln!(output, " path: {:?},", path).unwrap(); - writeln!(output, " format: {:?},", format).unwrap(); - if let Some(key_id) = aws_access_key_id { - writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); - } - if let Some(secret) = aws_secret_access_key { - writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); - } - if let Some(comp) = compression { - writeln!(output, " compression: {:?},", comp).unwrap(); - } - if let Some(ps) = partition_strategy { - writeln!(output, " partitionStrategy: {:?},", ps).unwrap(); - } - if let Some(pc) = partition_columns_in_data_file { - writeln!(output, " partitionColumnsInDataFile: {:?},", pc).unwrap(); - } + if let Some(is_deleted_col) = is_deleted { + writeln!(output, " isDeleted: {:?},", is_deleted_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer { - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - flush_time, - flush_rows, - flush_bytes, - } => { - writeln!(output, " engine: ClickHouseEngines.Buffer,").unwrap(); - writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); - writeln!(output, " targetTable: {:?},", target_table).unwrap(); - writeln!(output, " numLayers: {},", num_layers).unwrap(); - writeln!(output, " minTime: {},", min_time).unwrap(); - writeln!(output, " maxTime: {},", max_time).unwrap(); - writeln!(output, " minRows: {},", min_rows).unwrap(); - writeln!(output, " maxRows: {},", max_rows).unwrap(); - writeln!(output, " minBytes: {},", min_bytes).unwrap(); - writeln!(output, " maxBytes: {},", max_bytes).unwrap(); - if let Some(ft) = flush_time { - writeln!(output, " flushTime: {},", ft).unwrap(); - } - if let Some(fr) = flush_rows { - writeln!(output, " flushRows: {},", fr).unwrap(); - } - if let Some(fb) = flush_bytes { - writeln!(output, " flushBytes: {},", fb).unwrap(); - } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedAggregatingMergeTree { keeper_path, replica_name } => { + writeln!(output, " engine: ClickHouseEngines.ReplicatedAggregatingMergeTree,").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + writeln!(output, " keeperPath: {:?},", path).unwrap(); + writeln!(output, " replicaName: {:?},", name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Distributed { - cluster, - target_database, - target_table, - sharding_key, - policy_name, - } => { - writeln!(output, " engine: ClickHouseEngines.Distributed,").unwrap(); - writeln!(output, " cluster: {:?},", cluster).unwrap(); - writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); - writeln!(output, " targetTable: {:?},", target_table).unwrap(); - if let Some(key) = sharding_key { - writeln!(output, " shardingKey: {:?},", key).unwrap(); - } - if let Some(policy) = policy_name { - writeln!(output, " policyName: {:?},", policy).unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedSummingMergeTree { keeper_path, replica_name, columns } => { + writeln!(output, " engine: ClickHouseEngines.ReplicatedSummingMergeTree,").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + writeln!(output, " keeperPath: {:?},", path).unwrap(); + writeln!(output, " replicaName: {:?},", name).unwrap(); + } + if let Some(cols) = columns { + if !cols.is_empty() { + let col_list = cols.iter().map(|c| format!("{:?}", c)).collect::>().join(", "); + writeln!(output, " columns: [{}],", col_list).unwrap(); } } } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + partition_strategy, + partition_columns_in_data_file, + } => { + writeln!(output, " engine: ClickHouseEngines.S3,").unwrap(); + writeln!(output, " path: {:?},", path).unwrap(); + writeln!(output, " format: {:?},", format).unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); + } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); + } + if let Some(comp) = compression { + writeln!(output, " compression: {:?},", comp).unwrap(); + } + if let Some(ps) = partition_strategy { + writeln!(output, " partitionStrategy: {:?},", ps).unwrap(); + } + if let Some(pc) = partition_columns_in_data_file { + writeln!(output, " partitionColumnsInDataFile: {:?},", pc).unwrap(); + } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + writeln!(output, " engine: ClickHouseEngines.Buffer,").unwrap(); + writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); + writeln!(output, " targetTable: {:?},", target_table).unwrap(); + writeln!(output, " numLayers: {},", num_layers).unwrap(); + writeln!(output, " minTime: {},", min_time).unwrap(); + writeln!(output, " maxTime: {},", max_time).unwrap(); + writeln!(output, " minRows: {},", min_rows).unwrap(); + writeln!(output, " maxRows: {},", max_rows).unwrap(); + writeln!(output, " minBytes: {},", min_bytes).unwrap(); + writeln!(output, " maxBytes: {},", max_bytes).unwrap(); + if let Some(ft) = flush_time { + writeln!(output, " flushTime: {},", ft).unwrap(); + } + if let Some(fr) = flush_rows { + writeln!(output, " flushRows: {},", fr).unwrap(); + } + if let Some(fb) = flush_bytes { + writeln!(output, " flushBytes: {},", fb).unwrap(); + } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + writeln!(output, " engine: ClickHouseEngines.Distributed,").unwrap(); + writeln!(output, " cluster: {:?},", cluster).unwrap(); + writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); + writeln!(output, " targetTable: {:?},", target_table).unwrap(); + if let Some(key) = sharding_key { + writeln!(output, " shardingKey: {:?},", key).unwrap(); + } + if let Some(policy) = policy_name { + writeln!(output, " policyName: {:?},", policy).unwrap(); + } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + writeln!(output, " engine: ClickHouseEngines.IcebergS3,").unwrap(); + writeln!(output, " path: {:?},", path).unwrap(); + writeln!(output, " format: {:?},", format).unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); + } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); + } + if let Some(comp) = compression { + writeln!(output, " compression: {:?},", comp).unwrap(); + } + } } if let Some(version) = &table.version { writeln!(output, " version: {:?},", version).unwrap(); @@ -989,7 +1004,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "User".to_string(), @@ -1002,6 +1017,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1059,14 +1076,14 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["id".to_string()]), sample_by: None, partition_by: None, - engine: Some(ClickhouseEngine::S3Queue { + engine: ClickhouseEngine::S3Queue { s3_path: "s3://bucket/path".to_string(), format: "JSONEachRow".to_string(), compression: Some("gzip".to_string()), headers: None, aws_access_key_id: None, aws_secret_access_key: None, - }), + }, version: None, source_primitive: PrimitiveSignature { name: "Events".to_string(), @@ -1083,6 +1100,8 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1113,7 +1132,7 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1133,6 +1152,8 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1186,10 +1207,10 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::ReplacingMergeTree { + engine: ClickhouseEngine::ReplacingMergeTree { ver: Some("version".to_string()), is_deleted: Some("is_deleted".to_string()), - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1202,6 +1223,8 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1230,10 +1253,10 @@ export const UserTable = new OlapTable("User", { sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, - engine: Some(ClickhouseEngine::ReplicatedMergeTree { + engine: ClickhouseEngine::ReplicatedMergeTree { keeper_path: Some("/clickhouse/tables/{shard}/user_data".to_string()), replica_name: Some("{replica}".to_string()), - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1246,6 +1269,8 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1304,12 +1329,12 @@ export const UserTable = new OlapTable("User", { sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, - engine: Some(ClickhouseEngine::ReplicatedReplacingMergeTree { + engine: ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path: Some("/clickhouse/tables/{shard}/user_data".to_string()), replica_name: Some("{replica}".to_string()), ver: Some("version".to_string()), is_deleted: Some("is_deleted".to_string()), - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1322,6 +1347,8 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1356,7 +1383,7 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["u64".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "IndexTest".to_string(), @@ -1384,6 +1411,8 @@ export const UserTable = new OlapTable("User", { ], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1441,7 +1470,7 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Task".to_string(), @@ -1454,6 +1483,8 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1518,7 +1549,7 @@ export const TaskTable = new OlapTable("Task", { order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Events".to_string(), @@ -1531,6 +1562,8 @@ export const TaskTable = new OlapTable("Task", { indexes: vec![], database: None, table_ttl_setting: Some("timestamp + INTERVAL 90 DAY DELETE".to_string()), + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1585,7 +1618,7 @@ export const TaskTable = new OlapTable("Task", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "JsonTest".to_string(), @@ -1597,6 +1630,8 @@ export const TaskTable = new OlapTable("Task", { table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1631,7 +1666,7 @@ export const TaskTable = new OlapTable("Task", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "ExternalData".to_string(), @@ -1644,6 +1679,8 @@ export const TaskTable = new OlapTable("Task", { indexes: vec![], database: Some("analytics_db".to_string()), table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); diff --git a/apps/framework-cli/src/framework/typescript/parser.rs b/apps/framework-cli/src/framework/typescript/parser.rs index d6ec488171..16bc7495f8 100644 --- a/apps/framework-cli/src/framework/typescript/parser.rs +++ b/apps/framework-cli/src/framework/typescript/parser.rs @@ -38,7 +38,7 @@ pub async fn extract_data_model_from_file( let internal = project.internal_dir().unwrap(); let output_dir = internal.join("serialized_datamodels"); - log::info!("Extracting data model from file: {:?}", path); + tracing::info!("Extracting data model from file: {:?}", path); fs::write( internal.join(TSCONFIG_JSON), @@ -85,12 +85,12 @@ pub async fn extract_data_model_from_file( run_command_with_output_proxy(command, "TypeScript Compiler") .await .map_err(|err| { - log::error!("Error while running moose-tspc: {}", err); + tracing::error!("Error while running moose-tspc: {}", err); TypescriptParsingError::TypescriptCompilerError(Some(err)) })? }; - log::info!("Typescript compiler return code: {:?}", ts_return_code); + tracing::info!("Typescript compiler return code: {:?}", ts_return_code); if !ts_return_code.success() { return Err(TypescriptParsingError::TypescriptCompilerError(None)); diff --git a/apps/framework-cli/src/framework/typescript/scripts_worker.rs b/apps/framework-cli/src/framework/typescript/scripts_worker.rs index 9fcd4eed47..6a4c206e3e 100644 --- a/apps/framework-cli/src/framework/typescript/scripts_worker.rs +++ b/apps/framework-cli/src/framework/typescript/scripts_worker.rs @@ -1,9 +1,9 @@ use crate::cli::display::{show_message_wrapper, Message, MessageType}; use crate::project::{Project, ProjectFileError}; -use log::{debug, error, info, warn}; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{debug, error, info, warn}; use super::bin; diff --git a/apps/framework-cli/src/framework/typescript/streaming.rs b/apps/framework-cli/src/framework/typescript/streaming.rs index d76559a4cd..d8d7869551 100644 --- a/apps/framework-cli/src/framework/typescript/streaming.rs +++ b/apps/framework-cli/src/framework/typescript/streaming.rs @@ -1,7 +1,7 @@ -use log::{error, info}; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info}; use super::bin; use crate::infrastructure::stream::kafka::models::KafkaConfig; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/client.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/client.rs index 10d3fff6a5..e6b270fcd1 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/client.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/client.rs @@ -7,13 +7,13 @@ use hyper::body::Bytes; use hyper::{Request, Response, Uri}; use hyper_tls::HttpsConnector; use hyper_util::client::legacy::{connect::HttpConnector, Client}; -use log::debug; use tokio::time::{sleep, Duration}; +use tracing::debug; use super::config::ClickHouseConfig; use super::model::{wrap_and_join_column_names, ClickHouseRecord}; -use log::error; +use tracing::error; use async_trait::async_trait; @@ -152,7 +152,7 @@ impl ClickHouseClient { let body = Self::build_body(columns, records); - log::trace!("Inserting into clickhouse with values: {}", body); + tracing::trace!("Inserting into clickhouse with values: {}", body); let bytes = Bytes::from(body); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/config.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/config.rs index 521902cc01..2e97df1daf 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/config.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/config.rs @@ -64,15 +64,46 @@ impl Default for ClickHouseConfig { } } +/// Result of parsing a ClickHouse connection string, including conversion metadata +#[derive(Debug, Clone)] +pub struct ParsedConnectionString { + pub config: ClickHouseConfig, + pub was_native_protocol: bool, + pub display_url: String, + pub database_was_explicit: bool, +} + /// Parses a ClickHouse connection string (URL) into a ClickHouseConfig /// /// Supports multiple URL schemes (https, clickhouse) and extracts database name from path or query parameter. /// Automatically determines SSL usage based on scheme and port. +/// Percent-decodes username and password for proper handling of special characters. pub fn parse_clickhouse_connection_string(conn_str: &str) -> anyhow::Result { + parse_clickhouse_connection_string_with_metadata(conn_str).map(|parsed| parsed.config) +} + +/// Parses a ClickHouse connection string with metadata about conversions performed +/// +/// Returns additional information useful for displaying user-facing messages, +/// such as whether native protocol conversion occurred and a display-safe URL. +pub fn parse_clickhouse_connection_string_with_metadata( + conn_str: &str, +) -> anyhow::Result { let url = Url::parse(conn_str)?; + let was_native_protocol = url.scheme() == "clickhouse"; - let user = url.username().to_string(); - let password = url.password().unwrap_or("").to_string(); + // Percent-decode username and password to handle special characters + let user = percent_encoding::percent_decode_str(url.username()) + .decode_utf8_lossy() + .to_string(); + let password = url + .password() + .map(|p| { + percent_encoding::percent_decode_str(p) + .decode_utf8_lossy() + .to_string() + }) + .unwrap_or_default(); let host = url.host_str().unwrap_or("localhost").to_string(); let mut http_port: Option = None; @@ -99,23 +130,39 @@ pub fn parse_clickhouse_connection_string(conn_str: &str) -> anyhow::Result (db, true), + None => ("default".to_string(), false), + } + }; + let config = ClickHouseConfig { - db_name, - user, - password, + db_name: db_name.clone(), + user: user.clone(), + password: password.clone(), use_ssl, - host, + host: host.clone(), host_port: http_port, native_port, host_data_path: None, @@ -123,7 +170,26 @@ pub fn parse_clickhouse_connection_string(conn_str: &str) -> anyhow::Result &str { - "ErrorStatsDiagnostic" - } - - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Error stats are system-wide, not component-specific - // This should be run separately outside the component loop - false +/// +/// Use `ErrorStatsDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct ErrorStatsDiagnostic(()); + +impl ErrorStatsDiagnostic { + /// Create a new ErrorStatsDiagnostic provider + pub const fn new() -> Self { + Self(()) } - fn is_system_wide(&self) -> bool { - true - } - - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract error statistics issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed (used for system-wide context) + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - // Get recent errors with significant counts - let query = "SELECT - name, - value, - last_error_time, - last_error_message - FROM system.errors - WHERE value > 0 - ORDER BY value DESC - LIMIT 10 - FORMAT JSON"; - - debug!("Executing errors query: {}", query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -114,7 +87,10 @@ impl DiagnosticProvider for ErrorStatsDiagnostic { source: "system.errors".to_string(), component: component.clone(), error_type: "system_error".to_string(), - message: format!("Error '{}' occurred {} times. Last: {}", name, value, last_error_message), + message: format!( + "Error '{}' occurred {} times. Last: {}", + name, value, last_error_message + ), details, suggested_action: "Review error pattern and recent query logs. Check ClickHouse server logs for more details.".to_string(), related_queries: vec![ @@ -127,3 +103,55 @@ impl DiagnosticProvider for ErrorStatsDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for ErrorStatsDiagnostic { + fn name(&self) -> &str { + "ErrorStatsDiagnostic" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Error stats are system-wide, not component-specific + // This should be run separately outside the component loop + false + } + + fn is_system_wide(&self) -> bool { + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Get recent errors with significant counts + let query = "SELECT + name, + value, + last_error_time, + last_error_message + FROM system.errors + WHERE value > 0 + ORDER BY value DESC + LIMIT 10 + FORMAT JSON"; + + debug!("Executing errors query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/merge_failures.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs similarity index 69% rename from apps/framework-cli/src/mcp/tools/infra_issues/merge_failures.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs index e97e9d7485..79cda395c0 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/merge_failures.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs @@ -1,9 +1,9 @@ //! Diagnostic provider for checking merge failures from system.metrics -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,55 +12,35 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking merge failures from system.metrics -pub struct MergeFailureDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for MergeFailureDiagnostic { - fn name(&self) -> &str { - "merge_failures" - } - - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Merge failures from system.metrics are system-wide, not component-specific - // This should be run separately outside the component loop - false - } - - fn is_system_wide(&self) -> bool { - true +/// +/// Use `MergeFailureDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct MergeFailureDiagnostic(()); + +impl MergeFailureDiagnostic { + /// Create a new MergeFailureDiagnostic provider + pub const fn new() -> Self { + Self(()) } - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract merge failure issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed (used for system-wide context) + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; - - let mut issues = Vec::new(); - - // Check system.metrics for background merge failures - // Note: This is a system-wide metric, not per-table, but we report it per-table for context - let metrics_query = - "SELECT value FROM system.metrics WHERE metric = 'FailedBackgroundMerges' FORMAT JSON"; - - debug!("Executing merge failure metrics query: {}", metrics_query); + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(metrics_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let failed_merges = json_response + let failed_merges = json_value .get("data") .and_then(|v| v.as_array()) .and_then(|arr| arr.first()) @@ -68,6 +48,8 @@ impl DiagnosticProvider for MergeFailureDiagnostic { .and_then(|v| v.as_u64()) .unwrap_or(0); + let mut issues = Vec::new(); + if failed_merges > 0 { let severity = if failed_merges > 10 { Severity::Error @@ -94,7 +76,7 @@ impl DiagnosticProvider for MergeFailureDiagnostic { "SELECT * FROM system.metrics WHERE metric LIKE '%Merge%'".to_string(), format!( "SELECT * FROM system.merges WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), ], }); @@ -103,3 +85,48 @@ impl DiagnosticProvider for MergeFailureDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for MergeFailureDiagnostic { + fn name(&self) -> &str { + "merge_failures" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Merge failures from system.metrics are system-wide, not component-specific + // This should be run separately outside the component loop + false + } + + fn is_system_wide(&self) -> bool { + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Check system.metrics for background merge failures + // Note: This is a system-wide metric, not per-table, but we report it per-table for context + let metrics_query = + "SELECT value FROM system.metrics WHERE metric = 'FailedBackgroundMerges' FORMAT JSON"; + + debug!("Executing merge failure metrics query: {}", metrics_query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(metrics_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/merges.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs similarity index 67% rename from apps/framework-cli/src/mcp/tools/infra_issues/merges.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs index 1d2176c3df..b5b9cf503c 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/merges.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs @@ -1,9 +1,9 @@ //! Diagnostic provider for checking stuck background merges -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,62 +12,39 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking stuck background merges -pub struct MergeDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for MergeDiagnostic { - fn name(&self) -> &str { - "MergeDiagnostic" +/// +/// Use `MergeDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct MergeDiagnostic(()); + +impl MergeDiagnostic { + /// Create a new MergeDiagnostic provider + pub const fn new() -> Self { + Self(()) } - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - true - } - - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract merge issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - // Check for long-running merges - let query = format!( - "SELECT - elapsed, - progress, - num_parts, - result_part_name, - total_size_bytes_compressed - FROM system.merges - WHERE database = '{}' AND table = '{}' - AND elapsed > 300 - ORDER BY elapsed DESC - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing merges query: {}", query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -102,16 +79,17 @@ impl DiagnosticProvider for MergeDiagnostic { error_type: "slow_merge".to_string(), message: format!( "Background merge running for {:.1} seconds ({:.1}% complete)", - elapsed, progress * 100.0 + elapsed, + progress * 100.0 ), details, suggested_action: "Monitor merge progress. If stuck, check server resources (CPU, disk I/O, memory). Consider stopping merge with SYSTEM STOP MERGES if necessary.".to_string(), related_queries: vec![ format!( "SELECT * FROM system.merges WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), - format!("SYSTEM STOP MERGES {}.{}", config.db_name, component.name), + format!("SYSTEM STOP MERGES {}.{}", db_name, component.name), ], }); } @@ -119,3 +97,53 @@ impl DiagnosticProvider for MergeDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for MergeDiagnostic { + fn name(&self) -> &str { + "MergeDiagnostic" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Check for long-running merges + let query = format!( + "SELECT + elapsed, + progress, + num_parts, + result_part_name, + total_size_bytes_compressed + FROM system.merges + WHERE database = '{}' AND table = '{}' + AND elapsed > 300 + ORDER BY elapsed DESC + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing merges query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs new file mode 100644 index 0000000000..b76f364d4b --- /dev/null +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs @@ -0,0 +1,938 @@ +//! # ClickHouse Diagnostics Module +//! +//! This module provides reusable diagnostic capabilities for ClickHouse infrastructure. +//! It defines a provider-based architecture where each diagnostic check is implemented +//! as a separate provider that can be run independently or orchestrated together. +//! +//! ## Architecture +//! +//! Three-layer design: +//! 1. **Provider Layer** - Individual diagnostics with testable parsing logic +//! 2. **Orchestration Layer** - Running diagnostics with common request/filter structs +//! 3. **Consumer Layer** - Tools (MCP, CLI) that translate inputs to DiagnosticRequest +//! +//! ## Diagnostic Providers +//! +//! ### 1. MutationDiagnostic +//! Detects stuck or failing mutations (ALTER operations). +//! - **Source**: `system.mutations` +//! - **Thresholds**: Error (has failure reason), Warning (not done) +//! +//! ### 2. PartsDiagnostic +//! Identifies excessive data parts per partition. +//! - **Source**: `system.parts` +//! - **Thresholds**: Error (>300 parts), Warning (>100 parts) +//! +//! ### 3. MergeDiagnostic +//! Monitors long-running background merges. +//! - **Source**: `system.merges` +//! - **Thresholds**: Error (>1800s), Warning (>300s) +//! +//! ### 4. ErrorStatsDiagnostic +//! Aggregates errors from ClickHouse system.errors. +//! - **Source**: `system.errors` +//! - **Thresholds**: Error (>100), Warning (>10), Info (>0) +//! +//! ### 5. S3QueueDiagnostic (S3Queue tables only) +//! Detects S3Queue ingestion failures. +//! - **Source**: `system.s3queue_log` +//! - **Thresholds**: Error (any failed entries) +//! +//! ### 6. ReplicationDiagnostic (Replicated* tables only) +//! Monitors replication health and queue backlogs. +//! - **Sources**: `system.replication_queue`, `system.replicas` +//! - **Thresholds**: Error (queue>50, tries>10), Warning (queue>10, tries>3) +//! +//! ### 7. MergeFailureDiagnostic +//! Detects system-wide background merge failures. +//! - **Source**: `system.metrics` +//! - **Thresholds**: Error (>10 failures), Warning (>0 failures) +//! +//! ### 8. StoppedOperationsDiagnostic +//! Identifies manually stopped operations. +//! - **Sources**: `system.parts`, `system.merges`, `system.replicas` +//! - **Thresholds**: Error (stopped replication), Warning (stopped merges) + +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; +use std::collections::HashMap; + +use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + +// Module declarations for diagnostic providers +mod errors; +mod merge_failures; +mod merges; +mod mutations; +mod parts; +mod replication; +mod s3queue; +mod stopped_operations; + +// Re-export diagnostic providers +pub use errors::ErrorStatsDiagnostic; +pub use merge_failures::MergeFailureDiagnostic; +pub use merges::MergeDiagnostic; +pub use mutations::MutationDiagnostic; +pub use parts::PartsDiagnostic; +pub use replication::ReplicationDiagnostic; +pub use s3queue::S3QueueDiagnostic; +pub use stopped_operations::StoppedOperationsDiagnostic; + +/// Error types for diagnostic operations +#[derive(Debug, thiserror::Error)] +pub enum DiagnosticError { + #[error("Failed to connect to ClickHouse: {0}")] + ConnectionFailed(String), + + #[error("Failed to execute diagnostic query: {0}")] + QueryFailed(String), + + #[error("Query timeout after {0} seconds")] + QueryTimeout(u64), + + #[error("Failed to parse query result: {0}")] + ParseError(String), + + #[error("Invalid parameter: {0}")] + InvalidParameter(String), +} + +/// Severity level for issues +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum Severity { + Error, + Warning, + Info, +} + +impl Severity { + /// Check if this severity should include issues of the given level + pub fn includes(&self, other: &Severity) -> bool { + match self { + Severity::Info => true, // Info includes all severities + Severity::Warning => matches!(other, Severity::Warning | Severity::Error), + Severity::Error => matches!(other, Severity::Error), + } + } +} + +/// Component information for issue context +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Component { + pub component_type: String, + pub name: String, + /// Flexible metadata for component-specific context (e.g., database, namespace, cluster) + #[serde(skip_serializing_if = "HashMap::is_empty")] + pub metadata: HashMap, +} + +/// Detailed information about an infrastructure issue +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Issue { + pub severity: Severity, + pub source: String, + pub component: Component, + pub error_type: String, + pub message: String, + pub details: Map, + pub suggested_action: String, + pub related_queries: Vec, +} + +/// Options for filtering and configuring diagnostic runs +#[derive(Debug, Clone)] +pub struct DiagnosticOptions { + /// Specific diagnostic names to run (empty = run all applicable diagnostics) + pub diagnostic_names: Vec, + /// Minimum severity level to report (filters results) + pub min_severity: Severity, + /// Optional time filter (e.g., "-1h" for last hour) + pub since: Option, +} + +impl Default for DiagnosticOptions { + fn default() -> Self { + Self { + diagnostic_names: Vec::new(), + min_severity: Severity::Info, + since: None, + } + } +} + +/// Request to run diagnostics on components +#[derive(Debug, Clone)] +pub struct DiagnosticRequest { + /// Components to diagnose (tables, views, etc.) + pub components: Vec<(Component, ClickhouseEngine)>, + /// Diagnostic options for filtering and configuration + pub options: DiagnosticOptions, +} + +/// Summary statistics for diagnostic results +#[derive(Debug, Serialize, Deserialize)] +pub struct IssueSummary { + pub total_issues: usize, + pub by_severity: HashMap, + pub by_component: HashMap, +} + +/// Infrastructure type for diagnostic context +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum InfrastructureType { + ClickHouse, +} + +/// Complete diagnostic output +#[derive(Debug, Serialize, Deserialize)] +pub struct DiagnosticOutput { + pub infrastructure_type: InfrastructureType, + pub issues: Vec, + pub summary: IssueSummary, +} + +impl DiagnosticOutput { + /// Create a new diagnostic output and compute summary statistics + pub fn new(infrastructure_type: InfrastructureType, issues: Vec) -> Self { + let mut by_severity = HashMap::new(); + let mut by_component = HashMap::new(); + + for issue in &issues { + let severity_key = format!("{:?}", issue.severity).to_lowercase(); + *by_severity.entry(severity_key).or_insert(0) += 1; + + let component_key = issue.component.name.clone(); + *by_component.entry(component_key).or_insert(0) += 1; + } + + let summary = IssueSummary { + total_issues: issues.len(), + by_severity, + by_component, + }; + + Self { + infrastructure_type, + issues, + summary, + } + } +} + +/// Trait for ClickHouse diagnostic providers +/// +/// Each provider implements checks for a specific aspect of ClickHouse infrastructure health. +/// Providers can be system-wide (run once) or component-specific (run per table/component). +#[async_trait::async_trait] +pub trait DiagnosticProvider: Send + Sync { + /// Name of this diagnostic provider + fn name(&self) -> &str; + + /// Check if this provider is applicable to the given component + fn applicable_to(&self, component: &Component, engine: Option<&ClickhouseEngine>) -> bool; + + /// Check if this provider is system-wide (not component-specific) + /// System-wide providers are run once, not per-component + fn is_system_wide(&self) -> bool { + false + } + + /// Run diagnostics and return list of issues found + async fn diagnose( + &self, + component: &Component, + engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + since: Option<&str>, + ) -> Result, DiagnosticError>; +} + +/// Create all available diagnostic providers +/// +/// Returns a vector containing instances of all diagnostic providers. +/// These can be filtered by name or applicability before running. +pub fn create_all_providers() -> Vec> { + vec![ + Box::new(MutationDiagnostic::new()), + Box::new(PartsDiagnostic::new()), + Box::new(MergeDiagnostic::new()), + Box::new(ErrorStatsDiagnostic::new()), + Box::new(S3QueueDiagnostic::new()), + Box::new(ReplicationDiagnostic::new()), + Box::new(MergeFailureDiagnostic::new()), + Box::new(StoppedOperationsDiagnostic::new()), + ] +} + +/// Get a specific diagnostic provider by name +/// +/// # Arguments +/// * `name` - The name of the provider to retrieve +/// +/// # Returns +/// Some(provider) if found, None otherwise +pub fn get_provider(name: &str) -> Option> { + create_all_providers() + .into_iter() + .find(|p| p.name() == name) +} + +/// Run diagnostics on the provided components +/// +/// This is the main orchestration function that: +/// 1. Filters providers by diagnostic_names (empty = run all applicable) +/// 2. Separates system-wide vs component-specific providers +/// 3. Runs system-wide providers once +/// 4. Runs component-specific providers for each applicable component +/// 5. Filters results by minimum severity +/// 6. Returns aggregated results +/// +/// # Arguments +/// * `request` - The diagnostic request containing components and options +/// * `config` - ClickHouse configuration for database connection +/// +/// # Returns +/// DiagnosticOutput with all issues found, filtered by severity +pub async fn run_diagnostics( + request: DiagnosticRequest, + config: &ClickHouseConfig, +) -> Result { + use tokio::task::JoinSet; + + let all_providers = create_all_providers(); + + // Filter providers by requested diagnostic names (empty = all) + let providers: Vec> = if request.options.diagnostic_names.is_empty() + { + all_providers + } else { + // Validate that requested diagnostic names exist + let available_names: Vec = + all_providers.iter().map(|p| p.name().to_string()).collect(); + let invalid_names: Vec = request + .options + .diagnostic_names + .iter() + .filter(|name| !available_names.contains(name)) + .cloned() + .collect(); + + if !invalid_names.is_empty() { + return Err(DiagnosticError::InvalidParameter(format!( + "Unknown diagnostic names: {}. Available diagnostics: {}", + invalid_names.join(", "), + available_names.join(", ") + ))); + } + + all_providers + .into_iter() + .filter(|p| { + request + .options + .diagnostic_names + .contains(&p.name().to_string()) + }) + .collect() + }; + + // Separate system-wide from component-specific providers + let (system_wide, component_specific): (Vec<_>, Vec<_>) = + providers.into_iter().partition(|p| p.is_system_wide()); + + let mut join_set = JoinSet::new(); + let config = config.clone(); + let since = request.options.since.clone(); + + // Spawn system-wide providers as concurrent tasks (use first component for context) + if let Some((first_component, _)) = request.components.first() { + let first_component = first_component.clone(); + for provider in system_wide { + let config = config.clone(); + let component = first_component.clone(); + let since = since.clone(); + let provider_name = provider.name().to_string(); + + join_set.spawn(async move { + let result = provider + .diagnose(&component, None, &config, since.as_deref()) + .await; + + (provider_name, result) + }); + } + } + + // Spawn component-specific providers as concurrent tasks + // We need to collect (component, provider) pairs to spawn since we can't borrow provider + let mut tasks_to_spawn = Vec::new(); + + for (component, engine) in request.components { + for provider in &component_specific { + // Check if provider is applicable to this component + if !provider.applicable_to(&component, Some(&engine)) { + continue; + } + + tasks_to_spawn.push(( + component.clone(), + engine.clone(), + provider.name().to_string(), + )); + } + } + + // Now spawn tasks with recreated providers for each task + for (component, engine, provider_name) in tasks_to_spawn { + let config = config.clone(); + let since = since.clone(); + + // Get a fresh provider instance for this task + let provider = get_provider(&provider_name); + + join_set.spawn(async move { + let result = if let Some(provider) = provider { + provider + .diagnose(&component, Some(&engine), &config, since.as_deref()) + .await + } else { + // This shouldn't happen since we just got the name from a valid provider + Err(DiagnosticError::InvalidParameter(format!( + "Provider {} not found", + provider_name + ))) + }; + + (provider_name, result) + }); + } + + // Collect results as they complete + let mut all_issues = Vec::new(); + + while let Some(join_result) = join_set.join_next().await { + match join_result { + Ok((provider_name, diagnostic_result)) => match diagnostic_result { + Ok(issues) => all_issues.extend(issues), + Err(e) => { + // Log error but continue with other providers + tracing::warn!("Provider {} failed: {}", provider_name, e); + } + }, + Err(e) => { + // Task panicked or was cancelled + tracing::error!("Diagnostic task failed: {}", e); + } + } + } + + // Filter issues by minimum severity + let filtered_issues: Vec = all_issues + .into_iter() + .filter(|issue| request.options.min_severity.includes(&issue.severity)) + .collect(); + + Ok(DiagnosticOutput::new( + InfrastructureType::ClickHouse, + filtered_issues, + )) +} + +#[cfg(test)] +pub mod test_providers { + use super::*; + use serde_json::json; + + /// Mock diagnostic provider that returns predictable issues for testing + /// + /// This provider can be configured to return specific issues without requiring + /// a real ClickHouse connection, making it useful for testing the orchestration + /// layer and MCP integration. + pub struct MockDiagnostic { + pub name: String, + pub system_wide: bool, + pub issues_to_return: Vec, + } + + impl MockDiagnostic { + /// Create a mock that returns specific issues + pub fn with_issues(name: &str, issues: Vec) -> Self { + Self { + name: name.to_string(), + system_wide: false, + issues_to_return: issues, + } + } + + /// Create a mock that returns an error issue + pub fn with_error(component_name: &str) -> Self { + let mut details = Map::new(); + details.insert("test_field".to_string(), json!("test_value")); + details.insert("count".to_string(), json!(42)); + + Self::with_issues( + "mock_diagnostic", + vec![Issue { + severity: Severity::Error, + source: "mock_source".to_string(), + component: Component { + component_type: "table".to_string(), + name: component_name.to_string(), + metadata: HashMap::new(), + }, + error_type: "mock_error".to_string(), + message: format!("Test error for {}", component_name), + details, + suggested_action: "Fix the mock issue".to_string(), + related_queries: vec![ + format!("SELECT * FROM {}", component_name), + "SHOW CREATE TABLE".to_string(), + ], + }], + ) + } + + /// Create a mock that returns a warning issue + pub fn with_warning(component_name: &str) -> Self { + let mut details = Map::new(); + details.insert("threshold".to_string(), json!(100)); + + Self::with_issues( + "mock_warning", + vec![Issue { + severity: Severity::Warning, + source: "mock_source".to_string(), + component: Component { + component_type: "table".to_string(), + name: component_name.to_string(), + metadata: HashMap::new(), + }, + error_type: "mock_warning".to_string(), + message: format!("Test warning for {}", component_name), + details, + suggested_action: "Monitor the situation".to_string(), + related_queries: vec![], + }], + ) + } + + /// Create a mock that always succeeds with no issues + pub fn always_healthy() -> Self { + Self::with_issues("healthy_mock", vec![]) + } + + /// Create a system-wide mock provider + pub fn system_wide(name: &str, issues: Vec) -> Self { + Self { + name: name.to_string(), + system_wide: true, + issues_to_return: issues, + } + } + } + + #[async_trait::async_trait] + impl DiagnosticProvider for MockDiagnostic { + fn name(&self) -> &str { + &self.name + } + + fn applicable_to(&self, _: &Component, _: Option<&ClickhouseEngine>) -> bool { + true + } + + fn is_system_wide(&self) -> bool { + self.system_wide + } + + async fn diagnose( + &self, + _component: &Component, + _engine: Option<&ClickhouseEngine>, + _config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + Ok(self.issues_to_return.clone()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_mock_diagnostic_with_error() { + let mock = test_providers::MockDiagnostic::with_error("test_table"); + let config = ClickHouseConfig { + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + db_name: "test_db".to_string(), + use_ssl: false, + user: "default".to_string(), + password: "".to_string(), + host_data_path: None, + additional_databases: Vec::new(), + clusters: None, + }; + + let component = Component { + component_type: "table".to_string(), + name: "test_table".to_string(), + metadata: HashMap::new(), + }; + + let issues = mock + .diagnose(&component, None, &config, None) + .await + .unwrap(); + + assert_eq!(issues.len(), 1); + assert_eq!(issues[0].severity, Severity::Error); + assert_eq!(issues[0].error_type, "mock_error"); + assert_eq!(issues[0].component.name, "test_table"); + assert_eq!(issues[0].related_queries.len(), 2); + } + + #[tokio::test] + async fn test_mock_diagnostic_always_healthy() { + let mock = test_providers::MockDiagnostic::always_healthy(); + let config = ClickHouseConfig { + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + db_name: "test_db".to_string(), + use_ssl: false, + user: "default".to_string(), + password: "".to_string(), + host_data_path: None, + additional_databases: Vec::new(), + clusters: None, + }; + + let component = Component { + component_type: "table".to_string(), + name: "test_table".to_string(), + metadata: HashMap::new(), + }; + + let issues = mock + .diagnose(&component, None, &config, None) + .await + .unwrap(); + assert_eq!(issues.len(), 0); + } + + #[test] + fn test_severity_includes() { + // Info includes all severities + assert!(Severity::Info.includes(&Severity::Error)); + assert!(Severity::Info.includes(&Severity::Warning)); + assert!(Severity::Info.includes(&Severity::Info)); + + // Warning includes warning and error + assert!(Severity::Warning.includes(&Severity::Error)); + assert!(Severity::Warning.includes(&Severity::Warning)); + assert!(!Severity::Warning.includes(&Severity::Info)); + + // Error includes only error + assert!(Severity::Error.includes(&Severity::Error)); + assert!(!Severity::Error.includes(&Severity::Warning)); + assert!(!Severity::Error.includes(&Severity::Info)); + } + + #[test] + fn test_severity_filtering() { + let mut details = Map::new(); + details.insert("level".to_string(), serde_json::json!("test")); + + let issues = [ + Issue { + severity: Severity::Error, + component: Component { + component_type: "table".to_string(), + name: "test".to_string(), + metadata: HashMap::new(), + }, + source: "test".to_string(), + error_type: "error_type".to_string(), + message: "Error".to_string(), + details: details.clone(), + suggested_action: "Fix".to_string(), + related_queries: vec![], + }, + Issue { + severity: Severity::Warning, + component: Component { + component_type: "table".to_string(), + name: "test".to_string(), + metadata: HashMap::new(), + }, + source: "test".to_string(), + error_type: "warning_type".to_string(), + message: "Warning".to_string(), + details: details.clone(), + suggested_action: "Check".to_string(), + related_queries: vec![], + }, + Issue { + severity: Severity::Info, + component: Component { + component_type: "table".to_string(), + name: "test".to_string(), + metadata: HashMap::new(), + }, + source: "test".to_string(), + error_type: "info_type".to_string(), + message: "Info".to_string(), + details, + suggested_action: "Note".to_string(), + related_queries: vec![], + }, + ]; + + // Filter for errors only + let filtered: Vec<_> = issues + .iter() + .filter(|i| Severity::Error.includes(&i.severity)) + .collect(); + + assert_eq!(filtered.len(), 1); + assert_eq!(filtered[0].severity, Severity::Error); + + // Filter for warnings and above + let filtered: Vec<_> = issues + .iter() + .filter(|i| Severity::Warning.includes(&i.severity)) + .collect(); + + assert_eq!(filtered.len(), 2); + + // Filter for all (info and above) + let filtered: Vec<_> = issues + .iter() + .filter(|i| Severity::Info.includes(&i.severity)) + .collect(); + + assert_eq!(filtered.len(), 3); + } + + #[test] + fn test_diagnostic_output_summary() { + let issues = vec![ + Issue { + severity: Severity::Error, + source: "mutations".to_string(), + component: Component { + component_type: "table".to_string(), + name: "users".to_string(), + metadata: HashMap::new(), + }, + error_type: "stuck_mutation".to_string(), + message: "Mutation stuck".to_string(), + details: Map::new(), + suggested_action: "Fix".to_string(), + related_queries: vec![], + }, + Issue { + severity: Severity::Warning, + source: "parts".to_string(), + component: Component { + component_type: "table".to_string(), + name: "users".to_string(), + metadata: HashMap::new(), + }, + error_type: "too_many_parts".to_string(), + message: "Too many parts".to_string(), + details: Map::new(), + suggested_action: "Wait for merge".to_string(), + related_queries: vec![], + }, + Issue { + severity: Severity::Error, + source: "replication".to_string(), + component: Component { + component_type: "table".to_string(), + name: "events".to_string(), + metadata: HashMap::new(), + }, + error_type: "replication_lag".to_string(), + message: "Replication lagging".to_string(), + details: Map::new(), + suggested_action: "Check network".to_string(), + related_queries: vec![], + }, + ]; + + let output = DiagnosticOutput::new(InfrastructureType::ClickHouse, issues); + + assert_eq!(output.summary.total_issues, 3); + assert_eq!(output.summary.by_severity.get("error"), Some(&2)); + assert_eq!(output.summary.by_severity.get("warning"), Some(&1)); + assert_eq!(output.summary.by_component.get("users"), Some(&2)); + assert_eq!(output.summary.by_component.get("events"), Some(&1)); + } + + #[tokio::test] + async fn test_concurrent_diagnostics_execution() { + use std::sync::atomic::{AtomicU32, Ordering}; + use std::sync::Arc; + use tokio::time::{sleep, Duration}; + + // Mock provider that tracks execution order + struct ConcurrentTestProvider { + name: String, + delay_ms: u64, + execution_counter: Arc, + execution_order: Arc, + } + + #[async_trait::async_trait] + impl DiagnosticProvider for ConcurrentTestProvider { + fn name(&self) -> &str { + &self.name + } + + fn applicable_to(&self, _: &Component, _: Option<&ClickhouseEngine>) -> bool { + true + } + + async fn diagnose( + &self, + _: &Component, + _: Option<&ClickhouseEngine>, + _: &ClickHouseConfig, + _: Option<&str>, + ) -> Result, DiagnosticError> { + // Simulate work with delay + sleep(Duration::from_millis(self.delay_ms)).await; + + // Track when this provider finished (not when it started) + let order = self.execution_counter.fetch_add(1, Ordering::SeqCst); + self.execution_order.store(order, Ordering::SeqCst); + + Ok(vec![]) + } + } + + // Test that fast provider completes before slow provider + // This proves concurrent execution (vs serial which would have slow finish first) + let execution_counter = Arc::new(AtomicU32::new(0)); + let slow_order = Arc::new(AtomicU32::new(0)); + let fast_order = Arc::new(AtomicU32::new(0)); + + let config = ClickHouseConfig { + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + db_name: "test_db".to_string(), + use_ssl: false, + user: "default".to_string(), + password: "".to_string(), + host_data_path: None, + additional_databases: Vec::new(), + clusters: None, + }; + + // Note: This test demonstrates the concurrent execution pattern, + // but can't actually test it without modifying run_diagnostics to accept custom providers. + // The actual concurrency is tested via observing real-world behavior (fast diagnostics return quickly) + + // For now, just verify the mock providers work + let slow = ConcurrentTestProvider { + name: "slow".to_string(), + delay_ms: 100, + execution_counter: execution_counter.clone(), + execution_order: slow_order.clone(), + }; + + let fast = ConcurrentTestProvider { + name: "fast".to_string(), + delay_ms: 10, + execution_counter: execution_counter.clone(), + execution_order: fast_order.clone(), + }; + + let component = Component { + component_type: "table".to_string(), + name: "test".to_string(), + metadata: HashMap::new(), + }; + + // Run them serially to establish baseline + let _ = slow.diagnose(&component, None, &config, None).await; + let _ = fast.diagnose(&component, None, &config, None).await; + + // In serial execution: slow finishes first (order=0), fast second (order=1) + assert_eq!(slow_order.load(Ordering::SeqCst), 0); + assert_eq!(fast_order.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn test_invalid_diagnostic_names_return_error() { + let config = ClickHouseConfig { + db_name: "test".to_string(), + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + ..Default::default() + }; + + let component = Component { + component_type: "table".to_string(), + name: "test_table".to_string(), + metadata: HashMap::new(), + }; + + // Test with invalid diagnostic name + let request = DiagnosticRequest { + components: vec![(component.clone(), ClickhouseEngine::default())], + options: DiagnosticOptions { + diagnostic_names: vec!["invalid_diagnostic".to_string()], + min_severity: Severity::Info, + since: None, + }, + }; + + let result = run_diagnostics(request, &config).await; + assert!(result.is_err()); + + if let Err(DiagnosticError::InvalidParameter(msg)) = result { + assert!(msg.contains("invalid_diagnostic")); + assert!(msg.contains("Available diagnostics:")); + } else { + panic!("Expected InvalidParameter error"); + } + + // Test with mix of valid and invalid names + let request = DiagnosticRequest { + components: vec![(component.clone(), ClickhouseEngine::default())], + options: DiagnosticOptions { + diagnostic_names: vec![ + "MutationDiagnostic".to_string(), // Valid name + "invalid_one".to_string(), + "invalid_two".to_string(), + ], + min_severity: Severity::Info, + since: None, + }, + }; + + let result = run_diagnostics(request, &config).await; + assert!(result.is_err()); + + if let Err(DiagnosticError::InvalidParameter(msg)) = result { + assert!(msg.contains("invalid_one")); + assert!(msg.contains("invalid_two")); + assert!(msg.contains("Unknown diagnostic names:")); + assert!(!msg.contains("MutationDiagnostic, invalid")); // Valid name not listed as invalid + } else { + panic!("Expected InvalidParameter error"); + } + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/mutations.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs similarity index 75% rename from apps/framework-cli/src/mcp/tools/infra_issues/mutations.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs index b7e34f14fe..d567c1cfee 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/mutations.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs @@ -1,9 +1,9 @@ //! Diagnostic provider for checking stuck or failed mutations -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,65 +12,39 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking stuck or failed mutations -pub struct MutationDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for MutationDiagnostic { - fn name(&self) -> &str { - "MutationDiagnostic" - } - - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Mutations can occur on any table - true +/// +/// Use `MutationDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct MutationDiagnostic(()); + +impl MutationDiagnostic { + /// Create a new MutationDiagnostic provider + pub const fn new() -> Self { + Self(()) } - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract mutation issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let query = format!( - "SELECT - mutation_id, - command, - create_time, - is_done, - latest_failed_part, - latest_fail_time, - latest_fail_reason - FROM system.mutations - WHERE database = '{}' AND table = '{}' - AND (is_done = 0 OR latest_fail_reason != '') - ORDER BY create_time DESC - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing mutations query: {}", query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - // Parse ClickHouse JSON response - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -130,7 +104,7 @@ impl DiagnosticProvider for MutationDiagnostic { let related_queries = vec![ format!( "SELECT * FROM system.mutations WHERE database = '{}' AND table = '{}' AND mutation_id = '{}'", - config.db_name, component.name, mutation_id + db_name, component.name, mutation_id ), format!("KILL MUTATION WHERE mutation_id = '{}'", mutation_id), ]; @@ -150,3 +124,55 @@ impl DiagnosticProvider for MutationDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for MutationDiagnostic { + fn name(&self) -> &str { + "MutationDiagnostic" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Mutations can occur on any table + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + let query = format!( + "SELECT + mutation_id, + command, + create_time, + is_done, + latest_failed_part, + latest_fail_time, + latest_fail_reason + FROM system.mutations + WHERE database = '{}' AND table = '{}' + AND (is_done = 0 OR latest_fail_reason != '') + ORDER BY create_time DESC + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing mutations query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/parts.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs similarity index 70% rename from apps/framework-cli/src/mcp/tools/infra_issues/parts.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs index 8e88e63c9a..2193363443 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/parts.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs @@ -1,9 +1,9 @@ //! Diagnostic provider for checking data parts issues -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,63 +12,39 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking data parts issues -pub struct PartsDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for PartsDiagnostic { - fn name(&self) -> &str { - "PartsDiagnostic" +/// +/// Use `PartsDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct PartsDiagnostic(()); + +impl PartsDiagnostic { + /// Create a new PartsDiagnostic provider + pub const fn new() -> Self { + Self(()) } - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Parts are relevant for all MergeTree tables - true - } - - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract parts issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; - - // Check for excessive parts count per partition - let query = format!( - "SELECT - partition, - count() as part_count, - sum(rows) as total_rows, - sum(bytes_on_disk) as total_bytes - FROM system.parts - WHERE database = '{}' AND table = '{}' AND active = 1 - GROUP BY partition - HAVING part_count > 100 - ORDER BY part_count DESC - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing parts query: {}", query); + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -110,16 +86,16 @@ impl DiagnosticProvider for PartsDiagnostic { details, suggested_action: format!( "Run OPTIMIZE TABLE to merge parts: OPTIMIZE TABLE {}.{} PARTITION '{}'", - config.db_name, component.name, partition + db_name, component.name, partition ), related_queries: vec![ format!( "SELECT * FROM system.parts WHERE database = '{}' AND table = '{}' AND partition = '{}' AND active = 1", - config.db_name, component.name, partition + db_name, component.name, partition ), format!( "OPTIMIZE TABLE {}.{} PARTITION '{}'", - config.db_name, component.name, partition + db_name, component.name, partition ), ], }); @@ -128,3 +104,54 @@ impl DiagnosticProvider for PartsDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for PartsDiagnostic { + fn name(&self) -> &str { + "PartsDiagnostic" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Parts are relevant for all MergeTree tables + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Check for excessive parts count per partition + let query = format!( + "SELECT + partition, + count() as part_count, + sum(rows) as total_rows, + sum(bytes_on_disk) as total_bytes + FROM system.parts + WHERE database = '{}' AND table = '{}' AND active = 1 + GROUP BY partition + HAVING part_count > 100 + ORDER BY part_count DESC + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing parts query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/replication.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs similarity index 77% rename from apps/framework-cli/src/mcp/tools/infra_issues/replication.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs index 7b6a0e203b..0d42c10f49 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/replication.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs @@ -1,9 +1,9 @@ //! Diagnostic provider for checking replication health -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,63 +12,27 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking replication health -pub struct ReplicationDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for ReplicationDiagnostic { - fn name(&self) -> &str { - "ReplicationDiagnostic" - } - - fn applicable_to(&self, _component: &Component, engine: Option<&ClickhouseEngine>) -> bool { - // Only applicable to Replicated* tables - matches!( - engine, - Some(ClickhouseEngine::ReplicatedMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedReplacingMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedSummingMergeTree { .. }) - ) +/// +/// Use `ReplicationDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct ReplicationDiagnostic(()); + +impl ReplicationDiagnostic { + /// Create a new ReplicationDiagnostic provider + pub const fn new() -> Self { + Self(()) } - async fn diagnose( - &self, + /// Parse queue size response and extract backlog issues + pub fn parse_queue_size_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; - - let mut issues = Vec::new(); - - // First check for large queue backlogs (indicates stopped or slow replication) - let queue_size_query = format!( - "SELECT count() as queue_size - FROM system.replication_queue - WHERE database = '{}' AND table = '{}' - FORMAT JSON", - config.db_name, component.name - ); - - debug!( - "Executing replication queue size query: {}", - queue_size_query - ); - - let queue_size_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&queue_size_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let queue_size_json: Value = serde_json::from_str(&queue_size_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let queue_size = queue_size_json + let queue_size = json_value .get("data") .and_then(|v| v.as_array()) .and_then(|arr| arr.first()) @@ -76,6 +40,8 @@ impl DiagnosticProvider for ReplicationDiagnostic { .and_then(|v| v.as_u64()) .unwrap_or(0); + let mut issues = Vec::new(); + // Report large queue backlogs (potential stopped replication) if queue_size > 10 { let severity = if queue_size > 50 { @@ -101,54 +67,38 @@ impl DiagnosticProvider for ReplicationDiagnostic { related_queries: vec![ format!( "SELECT * FROM system.replication_queue WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), format!( "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), - format!("SYSTEM START REPLICATION QUEUES {}.{}", config.db_name, component.name), + format!("SYSTEM START REPLICATION QUEUES {}.{}", db_name, component.name), ], }); } - // Check replication queue for stuck entries (retries or exceptions) - let queue_query = format!( - "SELECT - type, - source_replica, - create_time, - num_tries, - last_exception - FROM system.replication_queue - WHERE database = '{}' AND table = '{}' - AND (num_tries > 3 OR last_exception != '') - ORDER BY create_time ASC - LIMIT 20 - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing replication queue query: {}", queue_query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&queue_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; + Ok(issues) + } - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; + /// Parse replication queue entries and extract stuck entry issues + pub fn parse_queue_entries_response( + json_response: &str, + component: &Component, + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; + let mut issues = Vec::new(); + for row in data { let entry_type = row .get("type") @@ -201,47 +151,31 @@ impl DiagnosticProvider for ReplicationDiagnostic { related_queries: vec![ format!( "SELECT * FROM system.replication_queue WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), format!( "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), ], }); } - // Also check replica health status - let replica_query = format!( - "SELECT - is_readonly, - is_session_expired, - future_parts, - parts_to_check, - queue_size, - inserts_in_queue, - merges_in_queue, - absolute_delay - FROM system.replicas - WHERE database = '{}' AND table = '{}' - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing replicas query: {}", replica_query); + Ok(issues) + } - let replica_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&replica_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; + /// Parse replica health status and extract health issues + pub fn parse_replica_health_response( + json_response: &str, + component: &Component, + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let replica_json: Value = serde_json::from_str(&replica_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; + let mut issues = Vec::new(); - if let Some(replica_data) = replica_json.get("data").and_then(|v| v.as_array()) { + if let Some(replica_data) = json_value.get("data").and_then(|v| v.as_array()) { for row in replica_data { let is_readonly = row.get("is_readonly").and_then(|v| v.as_u64()).unwrap_or(0); let is_session_expired = row @@ -310,9 +244,9 @@ impl DiagnosticProvider for ReplicationDiagnostic { related_queries: vec![ format!( "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), - format!("SYSTEM RESTART REPLICA {}.{}", config.db_name, component.name), + format!("SYSTEM RESTART REPLICA {}.{}", db_name, component.name), ], }); } @@ -322,3 +256,130 @@ impl DiagnosticProvider for ReplicationDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for ReplicationDiagnostic { + fn name(&self) -> &str { + "ReplicationDiagnostic" + } + + fn applicable_to(&self, _component: &Component, engine: Option<&ClickhouseEngine>) -> bool { + // Only applicable to Replicated* tables + matches!( + engine, + Some(ClickhouseEngine::ReplicatedMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedReplacingMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedSummingMergeTree { .. }) + ) + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + let mut issues = Vec::new(); + + // First check for large queue backlogs (indicates stopped or slow replication) + let queue_size_query = format!( + "SELECT count() as queue_size + FROM system.replication_queue + WHERE database = '{}' AND table = '{}' + FORMAT JSON", + config.db_name, component.name + ); + + debug!( + "Executing replication queue size query: {}", + queue_size_query + ); + + let queue_size_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&queue_size_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_queue_size_response( + &queue_size_result, + component, + &config.db_name, + )?); + + // Check replication queue for stuck entries (retries or exceptions) + let queue_query = format!( + "SELECT + type, + source_replica, + create_time, + num_tries, + last_exception + FROM system.replication_queue + WHERE database = '{}' AND table = '{}' + AND (num_tries > 3 OR last_exception != '') + ORDER BY create_time ASC + LIMIT 20 + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing replication queue query: {}", queue_query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&queue_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_queue_entries_response( + &result, + component, + &config.db_name, + )?); + + // Also check replica health status + let replica_query = format!( + "SELECT + is_readonly, + is_session_expired, + future_parts, + parts_to_check, + queue_size, + inserts_in_queue, + merges_in_queue, + absolute_delay + FROM system.replicas + WHERE database = '{}' AND table = '{}' + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing replicas query: {}", replica_query); + + let replica_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&replica_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_replica_health_response( + &replica_result, + component, + &config.db_name, + )?); + + Ok(issues) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/s3queue.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs similarity index 71% rename from apps/framework-cli/src/mcp/tools/infra_issues/s3queue.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs index ea2d5ea0f9..bdc267c27b 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/s3queue.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs @@ -1,9 +1,9 @@ //! Diagnostic provider for checking S3Queue ingestion -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,64 +12,39 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking S3Queue ingestion -pub struct S3QueueDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for S3QueueDiagnostic { - fn name(&self) -> &str { - "S3QueueDiagnostic" - } - - fn applicable_to(&self, _component: &Component, engine: Option<&ClickhouseEngine>) -> bool { - // Only applicable to S3Queue tables - matches!(engine, Some(ClickhouseEngine::S3Queue { .. })) +/// +/// Use `S3QueueDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct S3QueueDiagnostic(()); + +impl S3QueueDiagnostic { + /// Create a new S3QueueDiagnostic provider + pub const fn new() -> Self { + Self(()) } - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract S3Queue ingestion issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - // Check for S3Queue ingestion errors - let query = format!( - "SELECT - file_name, - status, - processing_start_time, - processing_end_time, - exception - FROM system.s3queue_log - WHERE database = '{}' AND table = '{}' - AND status IN ('Failed', 'ProcessingFailed') - ORDER BY processing_start_time DESC - LIMIT 20 - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing S3Queue query: {}", query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -109,11 +84,11 @@ impl DiagnosticProvider for S3QueueDiagnostic { related_queries: vec![ format!( "SELECT * FROM system.s3queue_log WHERE database = '{}' AND table = '{}' ORDER BY processing_start_time DESC LIMIT 50", - config.db_name, component.name + db_name, component.name ), format!( "SELECT * FROM system.s3queue WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), ], }); @@ -122,3 +97,55 @@ impl DiagnosticProvider for S3QueueDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for S3QueueDiagnostic { + fn name(&self) -> &str { + "S3QueueDiagnostic" + } + + fn applicable_to(&self, _component: &Component, engine: Option<&ClickhouseEngine>) -> bool { + // Only applicable to S3Queue tables + matches!(engine, Some(ClickhouseEngine::S3Queue { .. })) + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Check for S3Queue ingestion errors + let query = format!( + "SELECT + file_name, + status, + processing_start_time, + processing_end_time, + exception + FROM system.s3queue_log + WHERE database = '{}' AND table = '{}' + AND status IN ('Failed', 'ProcessingFailed') + ORDER BY processing_start_time DESC + LIMIT 20 + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing S3Queue query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs new file mode 100644 index 0000000000..6523be76d9 --- /dev/null +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs @@ -0,0 +1,273 @@ +//! Diagnostic provider for checking stopped operations (merges, replication) + +use serde_json::{json, Map, Value}; +use tracing::debug; + +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; +use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; +use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + +/// Query timeout for diagnostic checks (30 seconds) +const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; + +/// Diagnostic provider for checking stopped operations (merges, replication) +/// +/// Use `StoppedOperationsDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct StoppedOperationsDiagnostic(()); + +impl StoppedOperationsDiagnostic { + /// Create a new StoppedOperationsDiagnostic provider + pub const fn new() -> Self { + Self(()) + } + + /// Parse parts count and merge count to detect stopped merges + /// + /// # Arguments + /// * `parts_json_response` - JSON response from parts count query + /// * `merges_json_response` - JSON response from merges count query + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues if merges appear to be stopped + pub fn parse_stopped_merges_response( + parts_json_response: &str, + merges_json_response: &str, + component: &Component, + db_name: &str, + ) -> Result, DiagnosticError> { + let parts_json: Value = serde_json::from_str(parts_json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; + + let parts_count = parts_json + .get("data") + .and_then(|v| v.as_array()) + .and_then(|arr| arr.first()) + .and_then(|row| row.get("part_count")) + .and_then(|v| v.as_u64()) + .unwrap_or(0); + + let mut issues = Vec::new(); + + // If we have many parts, check if merges are running + if parts_count > 100 { + let merges_json: Value = serde_json::from_str(merges_json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; + + let merge_count = merges_json + .get("data") + .and_then(|v| v.as_array()) + .and_then(|arr| arr.first()) + .and_then(|row| row.get("merge_count")) + .and_then(|v| v.as_u64()) + .unwrap_or(0); + + // If we have excessive parts but no merges running, merges might be stopped + if merge_count == 0 { + let mut details = Map::new(); + details.insert("part_count".to_string(), json!(parts_count)); + details.insert("active_merges".to_string(), json!(0)); + + issues.push(Issue { + severity: Severity::Warning, + source: "system.parts,system.merges".to_string(), + component: component.clone(), + error_type: "merges_possibly_stopped".to_string(), + message: format!( + "Table has {} active parts but no running merges. Merges may be stopped or throttled.", + parts_count + ), + details, + suggested_action: format!( + "Check if merges were manually stopped with 'SELECT * FROM system.settings WHERE name LIKE \"%merge%\"'. Start merges if needed: 'SYSTEM START MERGES {}.{}'", + db_name, component.name + ), + related_queries: vec![ + format!( + "SELECT * FROM system.parts WHERE database = '{}' AND table = '{}' AND active = 1 ORDER BY modification_time DESC LIMIT 20", + db_name, component.name + ), + format!( + "SYSTEM START MERGES {}.{}", + db_name, component.name + ), + ], + }); + } + } + + Ok(issues) + } + + /// Parse replica status to detect stopped replication + /// + /// # Arguments + /// * `json_response` - JSON response from replicas query + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues if replication appears to be stopped + pub fn parse_stopped_replication_response( + json_response: &str, + component: &Component, + db_name: &str, + ) -> Result, DiagnosticError> { + let replicas_json: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; + + let mut issues = Vec::new(); + + if let Some(replica_data) = replicas_json.get("data").and_then(|v| v.as_array()) { + for row in replica_data { + let is_readonly = row.get("is_readonly").and_then(|v| v.as_u64()).unwrap_or(0); + let queue_size = row.get("queue_size").and_then(|v| v.as_u64()).unwrap_or(0); + + // If replica is readonly with items in queue, replication might be stopped + if is_readonly == 1 && queue_size > 0 { + let mut details = Map::new(); + details.insert("is_readonly".to_string(), json!(true)); + details.insert("queue_size".to_string(), json!(queue_size)); + + issues.push(Issue { + severity: Severity::Error, + source: "system.replicas".to_string(), + component: component.clone(), + error_type: "replication_stopped".to_string(), + message: format!( + "Replica is in read-only mode with {} items in queue. Replication may be stopped.", + queue_size + ), + details, + suggested_action: format!( + "Investigate why replica is read-only. Try restarting replication: 'SYSTEM START REPLICATION QUEUES {}.{}'", + db_name, component.name + ), + related_queries: vec![ + format!( + "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", + db_name, component.name + ), + format!( + "SYSTEM START REPLICATION QUEUES {}.{}", + db_name, component.name + ), + ], + }); + } + } + } + + Ok(issues) + } +} + +#[async_trait::async_trait] +impl DiagnosticProvider for StoppedOperationsDiagnostic { + fn name(&self) -> &str { + "stopped_operations" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Applicable to all tables - we check both merges and replication + true + } + + async fn diagnose( + &self, + component: &Component, + engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + let mut issues = Vec::new(); + + // Check if merges are stopped for this table + // We can detect this by checking if there are no running merges but many parts + let parts_count_query = format!( + "SELECT count() as part_count + FROM system.parts + WHERE database = '{}' AND table = '{}' AND active = 1 + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing parts count query: {}", parts_count_query); + + let parts_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&parts_count_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + let merges_query = format!( + "SELECT count() as merge_count + FROM system.merges + WHERE database = '{}' AND table = '{}' + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing merges query: {}", merges_query); + + let merges_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&merges_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_stopped_merges_response( + &parts_result, + &merges_result, + component, + &config.db_name, + )?); + + // For replicated tables, check if replication queues are stopped + let is_replicated = matches!( + engine, + Some(ClickhouseEngine::ReplicatedMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedReplacingMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedSummingMergeTree { .. }) + ); + + if is_replicated { + let replicas_query = format!( + "SELECT is_readonly, queue_size + FROM system.replicas + WHERE database = '{}' AND table = '{}' + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing replicas query: {}", replicas_query); + + let replicas_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&replicas_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_stopped_replication_response( + &replicas_result, + component, + &config.db_name, + )?); + } + + Ok(issues) + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index da040b6700..affaa599a9 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -13,6 +13,7 @@ use crate::framework::core::infrastructure_map::{ }; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; use std::collections::HashMap; +use std::mem::discriminant; /// Generates a formatted error message for database field changes. /// @@ -365,7 +366,7 @@ fn is_only_required_change_for_special_column_type(before: &Column, after: &Colu impl ClickHouseTableDiffStrategy { /// Check if a table uses the S3Queue engine pub fn is_s3queue_table(table: &Table) -> bool { - matches!(&table.engine, Some(ClickhouseEngine::S3Queue { .. })) + matches!(&table.engine, ClickhouseEngine::S3Queue { .. }) } /// Check if a SQL resource is a materialized view that needs population @@ -392,7 +393,7 @@ impl ClickHouseTableDiffStrategy { // Skip population in production (user must handle manually) // Only populate in dev for new MVs with non-S3Queue sources if is_new && !has_s3queue_source && !is_production { - log::info!( + tracing::info!( "Adding population operation for materialized view '{}'", sql_resource.name ); @@ -435,7 +436,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // Check if ORDER BY has changed let order_by_changed = order_by_change.before != order_by_change.after; if order_by_changed { - log::warn!( + tracing::warn!( "ClickHouse: ORDER BY changed for table '{}', requiring drop+create", before.name ); @@ -462,7 +463,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { let error_message = format_database_change_error(&before.name, before_db, after_db); - log::error!("{}", error_message); + tracing::error!("{}", error_message); return vec![OlapChange::Table(TableChange::ValidationError { table_name: before.name.clone(), @@ -472,10 +473,16 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { })]; } + // Note: cluster_name changes are intentionally NOT treated as requiring drop+create. + // cluster_name is a deployment directive (how to run DDL) rather than a schema property + // (what the table looks like). When cluster_name changes, future DDL operations will + // automatically use the new cluster_name via the ON CLUSTER clause, but the table + // itself doesn't need to be recreated. + // Check if PARTITION BY has changed let partition_by_changed = partition_by_change.before != partition_by_change.after; if partition_by_changed { - log::warn!( + tracing::warn!( "ClickHouse: PARTITION BY changed for table '{}', requiring drop+create", before.name ); @@ -488,15 +495,20 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // SAMPLE BY can be modified via ALTER TABLE; do not force drop+create // Check if primary key structure has changed - let before_primary_keys = before.primary_key_columns(); - let after_primary_keys = after.primary_key_columns(); - if before_primary_keys != after_primary_keys + // Use normalized expressions to handle both primary_key_expression and column-level flags + // This ensures that primary_key_expression: Some("(foo, bar)") is equivalent to + // columns foo, bar marked with primary_key: true + let before_pk_expr = before.normalized_primary_key_expr(); + let after_pk_expr = after.normalized_primary_key_expr(); + if before_pk_expr != after_pk_expr // S3 allows specifying PK, but that information is not in system.columns - && after.engine.as_ref().is_none_or(|e| e.is_merge_tree_family()) + && after.engine.is_merge_tree_family() { - log::warn!( - "ClickHouse: Primary key structure changed for table '{}', requiring drop+create", - before.name + tracing::warn!( + "ClickHouse: Primary key structure changed for table '{}' (before: '{}', after: '{}'), requiring drop+create", + before.name, + before_pk_expr, + after_pk_expr ); return vec![ OlapChange::Table(TableChange::Removed(before.clone())), @@ -504,27 +516,26 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { ]; } - // First check if we can use hash comparison for engine changes - let engine_changed = if let (Some(before_hash), Some(after_hash)) = - (&before.engine_params_hash, &after.engine_params_hash) - { - // If both tables have hashes, compare them for change detection - // This includes credentials and other non-alterable parameters - before_hash != after_hash - } else { - // Fallback to direct engine comparison if hashes are not available - let before_engine = before.engine.as_ref(); - match after.engine.as_ref() { - // after.engine is unset -> before engine should be same as default - None => before_engine.is_some_and(|e| *e != ClickhouseEngine::MergeTree), - // force recreate only if engines are different - Some(e) => Some(e) != before_engine, - } - }; + // First make sure the engine type is the kind + // then check if we can use hash comparison for engine changes + let engine_changed = discriminant(&before.engine) != discriminant(&after.engine) + || if let (Some(before_hash), Some(after_hash)) = + (&before.engine_params_hash, &after.engine_params_hash) + { + // If both tables have hashes, compare them for change detection + // This includes credentials and other non-alterable parameters + before_hash != after_hash + } else { + // Fallback to direct engine comparison if hashes are not available + // Note: Tables are already normalized at this point (None -> Some(MergeTree)) + // via normalize_inframap_engines() in the remote plan flow, so we can + // safely use direct comparison + before.engine != after.engine + }; // Check if engine has changed (using hash comparison when available) if engine_changed { - log::warn!( + tracing::warn!( "ClickHouse: engine changed for table '{}', requiring drop+create", before.name ); @@ -533,18 +544,18 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { OlapChange::Table(TableChange::Added(after.clone())), ]; } - + let mut changes = Vec::new(); // Check if only table settings have changed if before.table_settings != after.table_settings { // List of readonly settings that cannot be modified after table creation // Source: ClickHouse/src/Storages/MergeTree/MergeTreeSettings.cpp::isReadonlySetting - const READONLY_SETTINGS: &[&str] = &[ - "index_granularity", - "index_granularity_bytes", - "enable_mixed_granularity_parts", - "add_minmax_index_for_numeric_columns", - "add_minmax_index_for_string_columns", - "table_disk", + const READONLY_SETTINGS: &[(&str, &str)] = &[ + ("index_granularity", "8192"), + ("index_granularity_bytes", "10485760"), + ("enable_mixed_granularity_parts", "1"), + ("add_minmax_index_for_numeric_columns", "0"), + ("add_minmax_index_for_string_columns", "0"), + ("table_disk", "0"), ]; // Check if any readonly settings have changed @@ -552,12 +563,16 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { let before_settings = before.table_settings.as_ref().unwrap_or(&empty_settings); let after_settings = after.table_settings.as_ref().unwrap_or(&empty_settings); - for readonly_setting in READONLY_SETTINGS { - let before_value = before_settings.get(*readonly_setting); - let after_value = after_settings.get(*readonly_setting); + for (readonly_setting, default) in READONLY_SETTINGS { + let before_value = before_settings + .get(*readonly_setting) + .map_or(*default, |v| v); + let after_value = after_settings + .get(*readonly_setting) + .map_or(*default, |v| v); if before_value != after_value { - log::warn!( + tracing::warn!( "ClickHouse: Readonly setting '{}' changed for table '{}' (from {:?} to {:?}), requiring drop+create", readonly_setting, before.name, @@ -571,34 +586,31 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { } } - log::debug!( + tracing::debug!( "ClickHouse: Only modifiable table settings changed for table '{}', can use ALTER TABLE MODIFY SETTING", before.name ); // Return the explicit SettingsChanged variant for clarity - return vec![OlapChange::Table(TableChange::SettingsChanged { + changes.push(OlapChange::Table(TableChange::SettingsChanged { name: before.name.clone(), before_settings: before.table_settings.clone(), after_settings: after.table_settings.clone(), table: after.clone(), - })]; + })); } // Check if this is an S3Queue table with column changes // S3Queue only supports MODIFY/RESET SETTING, not column operations - if !column_changes.is_empty() { - if let Some(engine) = &before.engine { - if matches!(engine, ClickhouseEngine::S3Queue { .. }) { - log::warn!( - "ClickHouse: S3Queue table '{}' has column changes, requiring drop+create (S3Queue doesn't support ALTER TABLE for columns)", - before.name - ); - return vec![ - OlapChange::Table(TableChange::Removed(before.clone())), - OlapChange::Table(TableChange::Added(after.clone())), - ]; - } - } + if !column_changes.is_empty() && matches!(&before.engine, ClickhouseEngine::S3Queue { .. }) + { + tracing::warn!( + "ClickHouse: S3Queue table '{}' has column changes, requiring drop+create (S3Queue doesn't support ALTER TABLE for columns)", + before.name + ); + return vec![ + OlapChange::Table(TableChange::Removed(before.clone())), + OlapChange::Table(TableChange::Added(after.clone())), + ]; } // Filter out no-op changes for ClickHouse semantics: @@ -617,18 +629,18 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // For other changes, ClickHouse can handle them via ALTER TABLE. // If there are no column/index/sample_by changes, return an empty vector. let sample_by_changed = before.sample_by != after.sample_by; - if column_changes.is_empty() && before.indexes == after.indexes && !sample_by_changed { - vec![] - } else { - vec![OlapChange::Table(TableChange::Updated { + if !column_changes.is_empty() || before.indexes != after.indexes || sample_by_changed { + changes.push(OlapChange::Table(TableChange::Updated { name: before.name.clone(), column_changes, order_by_change, partition_by_change, before: before.clone(), after: after.clone(), - })] - } + })) + }; + + changes } } @@ -671,10 +683,14 @@ mod tests { order_by: OrderBy::Fields(order_by), partition_by: None, sample_by: None, - engine: deduplicate.then_some(ClickhouseEngine::ReplacingMergeTree { - ver: None, - is_deleted: None, - }), + engine: if deduplicate { + ClickhouseEngine::ReplacingMergeTree { + ver: None, + is_deleted: None, + } + } else { + ClickhouseEngine::MergeTree + }, version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -687,6 +703,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, } } @@ -1473,14 +1491,14 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::S3Queue { + engine: ClickhouseEngine::S3Queue { s3_path: "s3://bucket/path".to_string(), format: "JSONEachRow".to_string(), compression: None, headers: None, aws_access_key_id: None, aws_secret_access_key: None, - }), + }, version: None, source_primitive: PrimitiveSignature { name: "test_s3".to_string(), @@ -1493,6 +1511,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; assert!(ClickHouseTableDiffStrategy::is_s3queue_table(&s3_table)); @@ -1621,4 +1641,579 @@ mod tests { error_msg.contains("INSERT INTO target_db.my_table SELECT * FROM source_db.my_table") ); } + + #[test] + fn test_cluster_change_from_none_to_some() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from None to Some + before.cluster_name = None; + after.cluster_name = Some("test_cluster".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_cluster_change_from_some_to_none() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from Some to None + before.cluster_name = Some("test_cluster".to_string()); + after.cluster_name = None; + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_cluster_change_between_different_clusters() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from one to another + before.cluster_name = Some("cluster_a".to_string()); + after.cluster_name = Some("cluster_b".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_no_cluster_change_both_none() { + let strategy = ClickHouseTableDiffStrategy; + + let before = create_test_table("test", vec!["id".to_string()], false); + let after = create_test_table("test", vec!["id".to_string()], false); + + // Both None - no cluster change + assert_eq!(before.cluster_name, None); + assert_eq!(after.cluster_name, None); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should not trigger a validation error - no changes at all + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_no_cluster_change_both_same() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Both have the same cluster + before.cluster_name = Some("test_cluster".to_string()); + after.cluster_name = Some("test_cluster".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should not trigger a validation error - no changes at all + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_change_requires_drop_create() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change primary key: before has id, after has timestamp + before.columns[0].primary_key = true; + before.columns[1].primary_key = false; + after.columns[0].primary_key = false; + after.columns[1].primary_key = true; + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Primary key change requires drop+create + assert_eq!(changes.len(), 2); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) + )); + } + + #[test] + fn test_primary_key_expression_equivalent_to_column_flags() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: use column-level primary_key flags for id and timestamp + before.columns[0].primary_key = true; + before.columns[1].primary_key = true; + + // After: use primary_key_expression with same columns + after.columns[0].primary_key = false; + after.columns[1].primary_key = false; + after.primary_key_expression = Some("(id, timestamp)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since primary keys are semantically equivalent + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_expression_single_column() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: use column-level primary_key flag for single column + before.columns[0].primary_key = true; + + // After: use primary_key_expression with same single column + after.columns[0].primary_key = false; + after.primary_key_expression = Some("id".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since primary keys are semantically equivalent + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_expression_with_extra_spaces() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: primary_key_expression with no spaces + before.columns[0].primary_key = false; + before.columns[1].primary_key = false; + before.primary_key_expression = Some("(id,timestamp)".to_string()); + + // After: primary_key_expression with spaces (should be normalized the same) + after.columns[0].primary_key = false; + after.columns[1].primary_key = false; + after.primary_key_expression = Some("( id , timestamp )".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since both normalize to the same expression + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_expression_different_order_requires_drop_create() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: primary key is (id, timestamp) + before.columns[0].primary_key = true; + before.columns[1].primary_key = true; + + // After: primary key is (timestamp, id) - different order + after.columns[0].primary_key = false; + after.columns[1].primary_key = false; + after.primary_key_expression = Some("(timestamp, id)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Different order requires drop+create + assert_eq!(changes.len(), 2); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) + )); + } + + #[test] + fn test_primary_key_expression_with_function() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: simple column-level primary key + before.columns[0].primary_key = true; + + // After: primary key with function expression + after.columns[0].primary_key = false; + after.primary_key_expression = Some("(id, cityHash64(timestamp))".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Different primary key (function vs simple column) requires drop+create + assert_eq!(changes.len(), 2); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) + )); + } + + #[test] + fn test_primary_key_expression_single_column_with_parens() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: use column-level primary_key flag for single column + before.columns[0].primary_key = true; + + // After: use primary_key_expression with parentheses around single column + // In ClickHouse, (col) and col are semantically equivalent for PRIMARY KEY + after.columns[0].primary_key = false; + after.primary_key_expression = Some("(id)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since (id) and id are semantically equivalent + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_expression_function_with_parens() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Both use primary_key_expression with a function wrapped in parens + before.columns[0].primary_key = false; + before.primary_key_expression = Some("(cityHash64(id))".to_string()); + + after.columns[0].primary_key = false; + after.primary_key_expression = Some("cityHash64(id)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since (expr) and expr are semantically equivalent + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_multi_column_keeps_parens() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Both have multi-column primary keys - should keep parentheses + before.columns[0].primary_key = true; + before.columns[1].primary_key = true; + + after.columns[0].primary_key = false; + after.columns[1].primary_key = false; + after.primary_key_expression = Some("(id,timestamp)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create - both normalize to (id,timestamp) + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_nested_function_parens() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Test that nested parentheses in functions are preserved correctly + before.columns[0].primary_key = false; + before.primary_key_expression = Some("(cityHash64(id, timestamp))".to_string()); + + after.columns[0].primary_key = false; + after.primary_key_expression = Some("cityHash64(id, timestamp)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create - both are the same function, just with/without outer parens + assert_eq!(changes.len(), 0); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/inserter.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/inserter.rs index a776c40503..da12c6148d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/inserter.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/inserter.rs @@ -47,8 +47,8 @@ use crate::infrastructure::olap::clickhouse::client::ClickHouseClientTrait; use crate::infrastructure::olap::clickhouse::model::ClickHouseRecord; use std::collections::{HashMap, VecDeque}; -use log::{info, warn}; use rdkafka::error::KafkaError; +use tracing::{info, warn}; /// Represents a Kafka partition identifier type Partition = i32; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs index 9cf81a5c1a..21f23e6624 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs @@ -10,7 +10,6 @@ use crate::infrastructure::olap::clickhouse::model::{ }; use super::errors::ClickhouseError; -use super::queries::ClickhouseEngine; /// Generates a column comment, preserving any existing user comment and adding/updating metadata for enums fn generate_column_comment(column: &Column) -> Result, ClickhouseError> { @@ -328,10 +327,7 @@ pub fn std_columns_to_clickhouse_columns( pub fn std_table_to_clickhouse_table(table: &Table) -> Result { let columns = std_columns_to_clickhouse_columns(&table.columns)?; - let clickhouse_engine = match &table.engine { - Some(engine) => engine.clone(), - None => ClickhouseEngine::MergeTree, - }; + let clickhouse_engine = table.engine.clone(); Ok(ClickHouseTable { name: table.name.clone(), @@ -354,6 +350,8 @@ pub fn std_table_to_clickhouse_table(table: &Table) -> Result, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Add a column to a table AddTableColumn { @@ -129,6 +134,8 @@ pub enum SerializableOlapOperation { after_column: Option, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Drop a column from a table DropTableColumn { @@ -138,6 +145,8 @@ pub enum SerializableOlapOperation { column_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify a column in a table ModifyTableColumn { @@ -149,6 +158,8 @@ pub enum SerializableOlapOperation { after_column: Column, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RenameTableColumn { /// The table containing the column @@ -159,6 +170,8 @@ pub enum SerializableOlapOperation { after_column_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify table settings using ALTER TABLE MODIFY SETTING ModifyTableSettings { @@ -170,6 +183,8 @@ pub enum SerializableOlapOperation { after_settings: Option>, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify or remove table-level TTL ModifyTableTtl { @@ -178,29 +193,39 @@ pub enum SerializableOlapOperation { after: Option, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, AddTableIndex { table: String, index: TableIndex, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, DropTableIndex { table: String, index_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, ModifySampleBy { table: String, expression: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RemoveSampleBy { table: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RawSql { /// The SQL statements to execute @@ -436,41 +461,78 @@ pub async fn execute_atomic_operation( SerializableOlapOperation::CreateTable { table } => { execute_create_table(db_name, table, client, is_dev).await?; } - SerializableOlapOperation::DropTable { table, database } => { - execute_drop_table(db_name, table, database.as_deref(), client).await?; + SerializableOlapOperation::DropTable { + table, + database, + cluster_name, + } => { + execute_drop_table( + db_name, + table, + database.as_deref(), + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::AddTableColumn { table, column, after_column, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_add_table_column(target_db, table, column, after_column, client).await?; + execute_add_table_column( + target_db, + table, + column, + after_column, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::DropTableColumn { table, column_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_drop_table_column(target_db, table, column_name, client).await?; + execute_drop_table_column( + target_db, + table, + column_name, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::ModifyTableColumn { table, before_column, after_column, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_modify_table_column(target_db, table, before_column, after_column, client) - .await?; + execute_modify_table_column( + target_db, + table, + before_column, + after_column, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::RenameTableColumn { table, before_column_name, after_column_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); execute_rename_table_column( @@ -478,6 +540,7 @@ pub async fn execute_atomic_operation( table, before_column_name, after_column_name, + cluster_name.as_deref(), client, ) .await?; @@ -487,6 +550,7 @@ pub async fn execute_atomic_operation( before_settings, after_settings, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); execute_modify_table_settings( @@ -494,6 +558,7 @@ pub async fn execute_atomic_operation( table, before_settings, after_settings, + cluster_name.as_deref(), client, ) .await?; @@ -503,16 +568,24 @@ pub async fn execute_atomic_operation( before: _, after, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); // Build ALTER TABLE ... [REMOVE TTL | MODIFY TTL expr] + let cluster_clause = cluster_name + .as_ref() + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = if let Some(expr) = after { format!( - "ALTER TABLE `{}`.`{}` MODIFY TTL {}", - target_db, table, expr + "ALTER TABLE `{}`.`{}`{} MODIFY TTL {}", + target_db, table, cluster_clause, expr ) } else { - format!("ALTER TABLE `{}`.`{}` REMOVE TTL", target_db, table) + format!( + "ALTER TABLE `{}`.`{}`{} REMOVE TTL", + target_db, table, cluster_clause + ) }; run_query(&sql, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { @@ -525,29 +598,51 @@ pub async fn execute_atomic_operation( table, index, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_add_table_index(target_db, table, index, client).await?; + execute_add_table_index(target_db, table, index, cluster_name.as_deref(), client) + .await?; } SerializableOlapOperation::DropTableIndex { table, index_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_drop_table_index(target_db, table, index_name, client).await?; + execute_drop_table_index( + target_db, + table, + index_name, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::ModifySampleBy { table, expression, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_modify_sample_by(target_db, table, expression, client).await?; + execute_modify_sample_by( + target_db, + table, + expression, + cluster_name.as_deref(), + client, + ) + .await?; } - SerializableOlapOperation::RemoveSampleBy { table, database } => { + SerializableOlapOperation::RemoveSampleBy { + table, + database, + cluster_name, + } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_remove_sample_by(target_db, table, client).await?; + execute_remove_sample_by(target_db, table, cluster_name.as_deref(), client).await?; } SerializableOlapOperation::RawSql { sql, description } => { execute_raw_sql(sql, description, client).await?; @@ -562,7 +657,7 @@ async fn execute_create_table( client: &ConfiguredDBClient, is_dev: bool, ) -> Result<(), ClickhouseChangesError> { - log::info!("Executing CreateTable: {:?}", table.id(db_name)); + tracing::info!("Executing CreateTable: {:?}", table.id(db_name)); let clickhouse_table = std_table_to_clickhouse_table(table)?; // Use table's database if specified, otherwise use global database let target_database = table.database.as_deref().unwrap_or(db_name); @@ -580,6 +675,7 @@ async fn execute_add_table_index( db_name: &str, table_name: &str, index: &TableIndex, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { let args = if index.arguments.is_empty() { @@ -587,10 +683,14 @@ async fn execute_add_table_index( } else { format!("({})", index.arguments.join(", ")) }; + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` ADD INDEX `{}` {} TYPE {}{} GRANULARITY {}", + "ALTER TABLE `{}`.`{}`{} ADD INDEX `{}` {} TYPE {}{} GRANULARITY {}", db_name, table_name, + cluster_clause, index.name, index.expression, index.index_type, @@ -609,11 +709,15 @@ async fn execute_drop_table_index( db_name: &str, table_name: &str, index_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` DROP INDEX `{}`", - db_name, table_name, index_name + "ALTER TABLE `{}`.`{}`{} DROP INDEX `{}`", + db_name, table_name, cluster_clause, index_name ); run_query(&sql, client) .await @@ -627,11 +731,15 @@ async fn execute_modify_sample_by( db_name: &str, table_name: &str, expression: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` MODIFY SAMPLE BY {}", - db_name, table_name, expression + "ALTER TABLE `{}`.`{}`{} MODIFY SAMPLE BY {}", + db_name, table_name, cluster_clause, expression ); run_query(&sql, client) .await @@ -644,11 +752,15 @@ async fn execute_modify_sample_by( async fn execute_remove_sample_by( db_name: &str, table_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` REMOVE SAMPLE BY", - db_name, table_name + "ALTER TABLE `{}`.`{}`{} REMOVE SAMPLE BY", + db_name, table_name, cluster_clause ); run_query(&sql, client) .await @@ -662,12 +774,13 @@ async fn execute_drop_table( db_name: &str, table_name: &str, table_database: Option<&str>, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!("Executing DropTable: {:?}", table_name); + tracing::info!("Executing DropTable: {:?}", table_name); // Use table's database if specified, otherwise use global database let target_database = table_database.unwrap_or(db_name); - let drop_query = drop_table_query(target_database, table_name)?; + let drop_query = drop_table_query(target_database, table_name, cluster_name)?; run_query(&drop_query, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -687,9 +800,10 @@ async fn execute_add_table_column( table_name: &str, column: &Column, after_column: &Option, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing AddTableColumn for table: {}, column: {}, after: {:?}", table_name, column.name, @@ -705,10 +819,15 @@ async fn execute_add_table_column( .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + let add_column_query = format!( - "ALTER TABLE `{}`.`{}` ADD COLUMN `{}` {}{} {}", + "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{} {}", db_name, table_name, + cluster_clause, clickhouse_column.name, column_type_string, default_clause, @@ -717,7 +836,7 @@ async fn execute_add_table_column( Some(after_col) => format!("AFTER `{after_col}`"), } ); - log::debug!("Adding column: {}", add_column_query); + tracing::debug!("Adding column: {}", add_column_query); run_query(&add_column_query, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { error: e, @@ -731,18 +850,22 @@ async fn execute_drop_table_column( db_name: &str, table_name: &str, column_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing DropTableColumn for table: {}, column: {}", table_name, column_name ); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let drop_column_query = format!( - "ALTER TABLE `{}`.`{}` DROP COLUMN IF EXISTS `{}`", - db_name, table_name, column_name + "ALTER TABLE `{}`.`{}`{} DROP COLUMN IF EXISTS `{}`", + db_name, table_name, cluster_clause, column_name ); - log::debug!("Dropping column: {}", drop_column_query); + tracing::debug!("Dropping column: {}", drop_column_query); run_query(&drop_column_query, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { error: e, @@ -763,6 +886,7 @@ async fn execute_modify_table_column( table_name: &str, before_column: &Column, after_column: &Column, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { // Check if only the comment has changed @@ -780,7 +904,7 @@ async fn execute_modify_table_column( && !ttl_changed && comment_changed { - log::info!( + tracing::info!( "Executing comment-only modification for table: {}, column: {}", table_name, after_column.name @@ -790,16 +914,31 @@ async fn execute_modify_table_column( let clickhouse_column = std_column_to_clickhouse_column(after_column.clone())?; if let Some(ref comment) = clickhouse_column.comment { - execute_modify_column_comment(db_name, table_name, after_column, comment, client) - .await?; + execute_modify_column_comment( + db_name, + table_name, + after_column, + comment, + cluster_name, + client, + ) + .await?; } else { // If the new comment is None, we still need to update to remove the old comment - execute_modify_column_comment(db_name, table_name, after_column, "", client).await?; + execute_modify_column_comment( + db_name, + table_name, + after_column, + "", + cluster_name, + client, + ) + .await?; } return Ok(()); } - log::info!( + tracing::info!( "Executing ModifyTableColumn for table: {}, column: {} ({}→{})\ data_type_changed: {data_type_changed}, default_changed: {default_changed}, required_changed: {required_changed}, comment_changed: {comment_changed}, ttl_changed: {ttl_changed}", table_name, @@ -820,11 +959,12 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ &clickhouse_column, removing_default, removing_ttl, + cluster_name, )?; // Execute all statements in order for query in queries { - log::debug!("Modifying column: {}", query); + tracing::debug!("Modifying column: {}", query); run_query(&query, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -845,18 +985,19 @@ async fn execute_modify_column_comment( table_name: &str, column: &Column, comment: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing ModifyColumnComment for table: {}, column: {}", table_name, column.name ); let modify_comment_query = - build_modify_column_comment_sql(db_name, table_name, &column.name, comment)?; + build_modify_column_comment_sql(db_name, table_name, &column.name, comment, cluster_name)?; - log::debug!("Modifying column comment: {}", modify_comment_query); + tracing::debug!("Modifying column comment: {}", modify_comment_query); run_query(&modify_comment_query, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -872,25 +1013,30 @@ fn build_modify_column_sql( ch_col: &ClickHouseColumn, removing_default: bool, removing_ttl: bool, + cluster_name: Option<&str>, ) -> Result, ClickhouseChangesError> { let column_type_string = basic_field_type_to_string(&ch_col.column_type)?; + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + let mut statements = vec![]; // Add REMOVE DEFAULT statement if needed // ClickHouse doesn't allow mixing column properties with REMOVE clauses if removing_default { statements.push(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` REMOVE DEFAULT", - db_name, table_name, ch_col.name + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE DEFAULT", + db_name, table_name, cluster_clause, ch_col.name )); } // Add REMOVE TTL statement if needed if removing_ttl { statements.push(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` REMOVE TTL", - db_name, table_name, ch_col.name + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE TTL", + db_name, table_name, cluster_clause, ch_col.name )); } @@ -919,9 +1065,10 @@ fn build_modify_column_sql( let main_sql = if let Some(ref comment) = ch_col.comment { let escaped_comment = comment.replace('\'', "''"); format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN IF EXISTS `{}` {}{}{} COMMENT '{}'", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{} COMMENT '{}'", db_name, table_name, + cluster_clause, ch_col.name, column_type_string, default_clause, @@ -930,8 +1077,14 @@ fn build_modify_column_sql( ) } else { format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN IF EXISTS `{}` {}{}{}", - db_name, table_name, ch_col.name, column_type_string, default_clause, ttl_clause + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}", + db_name, + table_name, + cluster_clause, + ch_col.name, + column_type_string, + default_clause, + ttl_clause ) }; statements.push(main_sql); @@ -944,11 +1097,15 @@ fn build_modify_column_comment_sql( table_name: &str, column_name: &str, comment: &str, + cluster_name: Option<&str>, ) -> Result { let escaped_comment = comment.replace('\'', "''"); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); Ok(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` COMMENT '{}'", - db_name, table_name, column_name, escaped_comment + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` COMMENT '{}'", + db_name, table_name, cluster_clause, column_name, escaped_comment )) } @@ -958,6 +1115,7 @@ async fn execute_modify_table_settings( table_name: &str, before_settings: &Option>, after_settings: &Option>, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { use std::collections::HashMap; @@ -981,7 +1139,7 @@ async fn execute_modify_table_settings( } } - log::info!( + tracing::info!( "Executing ModifyTableSettings for table: {} - modifying {} settings, resetting {} settings", table_name, settings_to_modify.len(), @@ -990,9 +1148,13 @@ async fn execute_modify_table_settings( // Execute MODIFY SETTING if there are settings to modify if !settings_to_modify.is_empty() { - let alter_settings_query = - alter_table_modify_settings_query(db_name, table_name, &settings_to_modify)?; - log::debug!("Modifying table settings: {}", alter_settings_query); + let alter_settings_query = alter_table_modify_settings_query( + db_name, + table_name, + &settings_to_modify, + cluster_name, + )?; + tracing::debug!("Modifying table settings: {}", alter_settings_query); run_query(&alter_settings_query, client) .await @@ -1004,9 +1166,13 @@ async fn execute_modify_table_settings( // Execute RESET SETTING if there are settings to reset if !settings_to_reset.is_empty() { - let reset_settings_query = - alter_table_reset_settings_query(db_name, table_name, &settings_to_reset)?; - log::debug!("Resetting table settings: {}", reset_settings_query); + let reset_settings_query = alter_table_reset_settings_query( + db_name, + table_name, + &settings_to_reset, + cluster_name, + )?; + tracing::debug!("Resetting table settings: {}", reset_settings_query); run_query(&reset_settings_query, client) .await @@ -1025,18 +1191,22 @@ async fn execute_rename_table_column( table_name: &str, before_column_name: &str, after_column_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing RenameTableColumn for table: {}, column: {} → {}", table_name, before_column_name, after_column_name ); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let rename_column_query = format!( - "ALTER TABLE `{db_name}`.`{table_name}` RENAME COLUMN `{before_column_name}` TO `{after_column_name}`" + "ALTER TABLE `{db_name}`.`{table_name}`{cluster_clause} RENAME COLUMN `{before_column_name}` TO `{after_column_name}`" ); - log::debug!("Renaming column: {}", rename_column_query); + tracing::debug!("Renaming column: {}", rename_column_query); run_query(&rename_column_query, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { error: e, @@ -1052,14 +1222,14 @@ async fn execute_raw_sql( description: &str, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing {} raw SQL statements. {}", sql_statements.len(), description ); for (i, sql) in sql_statements.iter().enumerate() { if !sql.trim().is_empty() { - log::debug!("Executing SQL statement {}: {}", i + 1, sql); + tracing::debug!("Executing SQL statement {}: {}", i + 1, sql); run_query(sql, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -1328,49 +1498,6 @@ pub async fn fetch_tables_with_version( Ok(tables) } -/// Gets the number of rows in a table -/// -/// # Arguments -/// * `table_name` - Name of the table to check -/// * `config` - ClickHouse configuration -/// * `clickhouse` - Client handle for database operations -/// -/// # Returns -/// * `Result` - Number of rows in the table -/// -/// # Details -/// - Uses COUNT(*) for accurate row count -/// - Properly escapes table and database names -/// - Handles empty tables correctly -/// -/// # Example -/// ```rust -/// let size = check_table_size("users_1_0_0", &config, &mut client).await?; -/// println!("Table has {} rows", size); -/// ``` -pub async fn check_table_size( - table_name: &str, - config: &ClickHouseConfig, - clickhouse: &mut ClientHandle, -) -> Result { - info!("Checking size of {} table", table_name); - let result = clickhouse - .query(&format!( - "select count(*) from \"{}\".\"{}\"", - config.db_name.clone(), - table_name - )) - .fetch_all() - .await?; - let rows = result.rows().collect_vec(); - - let result: u64 = match rows.len() { - 1 => rows[0].get(0)?, - _ => panic!("Expected 1 result, got {:?}", rows.len()), - }; - Ok(result as i64) -} - pub struct TableWithUnsupportedType { pub database: String, pub name: String, @@ -1393,7 +1520,7 @@ fn parse_column_metadata(comment: &str) -> Option { match serde_json::from_str::(json_str) { Ok(metadata) => Some(metadata), Err(e) => { - log::warn!("Failed to parse column metadata JSON: {}", e); + tracing::warn!("Failed to parse column metadata JSON: {}", e); None } } @@ -1503,8 +1630,12 @@ impl OlapOperations for ConfiguredDBClient { let order_by_cols = extract_order_by_from_create_query(&create_query); debug!("Extracted ORDER BY columns: {:?}", order_by_cols); + // Extract PRIMARY KEY expression if present + let primary_key_expr = extract_primary_key_from_create_table(&create_query); + debug!("Extracted PRIMARY KEY expression: {:?}", primary_key_expr); + // Check if the CREATE TABLE statement has an explicit PRIMARY KEY clause - let has_explicit_primary_key = create_query.to_uppercase().contains("PRIMARY KEY"); + let has_explicit_primary_key = primary_key_expr.is_some(); debug!( "Table {} has explicit PRIMARY KEY: {}", table_name, has_explicit_primary_key @@ -1696,6 +1827,100 @@ impl OlapOperations for ConfiguredDBClient { debug!("Found {} columns for table {}", columns.len(), table_name); + // Determine if we should use primary_key_expression or column-level primary_key flags + // Strategy: Build the expected PRIMARY KEY from columns, then compare with extracted PRIMARY KEY + // If they match, use column-level flags; otherwise use primary_key_expression + let (final_columns, final_primary_key_expression) = + if let Some(pk_expr) = &primary_key_expr { + // Build expected PRIMARY KEY expression from columns marked as primary_key=true + let primary_key_columns: Vec = columns + .iter() + .filter(|c| c.primary_key) + .map(|c| c.name.clone()) + .collect(); + + debug!("Columns marked as primary key: {:?}", primary_key_columns); + + // Build expected expression: single column = "col", multiple = "(col1, col2)" + let expected_pk_expr = if primary_key_columns.is_empty() { + String::new() + } else if primary_key_columns.len() == 1 { + primary_key_columns[0].clone() + } else { + format!("({})", primary_key_columns.join(", ")) + }; + + debug!("Expected PRIMARY KEY expression: '{}'", expected_pk_expr); + debug!("Extracted PRIMARY KEY expression: '{}'", pk_expr); + + // Normalize both expressions for comparison (same logic as Table::normalized_primary_key_expr) + let normalize = |s: &str| -> String { + // Step 1: trim, remove backticks, remove spaces + let mut normalized = + s.trim().trim_matches('`').replace('`', "").replace(" ", ""); + + // Step 2: Strip outer parentheses if this is a single-element tuple + // E.g., "(col)" -> "col", "(cityHash64(col))" -> "cityHash64(col)" + // But keep "(col1,col2)" as-is + if normalized.starts_with('(') && normalized.ends_with(')') { + // Check if there are any top-level commas (not inside nested parentheses) + let inner = &normalized[1..normalized.len() - 1]; + let has_top_level_comma = { + let mut depth = 0; + let mut found_comma = false; + for ch in inner.chars() { + match ch { + '(' => depth += 1, + ')' => depth -= 1, + ',' if depth == 0 => { + found_comma = true; + break; + } + _ => {} + } + } + found_comma + }; + + // If no top-level comma, it's a single-element tuple - strip outer parens + if !has_top_level_comma { + normalized = inner.to_string(); + } + } + + normalized + }; + + let normalized_expected = normalize(&expected_pk_expr); + let normalized_extracted = normalize(pk_expr); + + debug!( + "Normalized expected: '{}', normalized extracted: '{}'", + normalized_expected, normalized_extracted + ); + + if normalized_expected == normalized_extracted { + // PRIMARY KEY matches what columns indicate, use column-level flags + debug!("PRIMARY KEY matches columns, using column-level primary_key flags"); + (columns, None) + } else { + // PRIMARY KEY differs (different order, expressions, etc.), use primary_key_expression + debug!("PRIMARY KEY differs from columns, using primary_key_expression"); + let updated_columns: Vec = columns + .into_iter() + .map(|mut c| { + c.primary_key = false; + c + }) + .collect(); + (updated_columns, Some(pk_expr.clone())) + } + } else { + // No PRIMARY KEY clause, use column-level flags as-is + debug!("No PRIMARY KEY clause, using column-level primary_key flags"); + (columns, None) + }; + // Extract base name and version for source primitive let (base_name, version) = extract_version_from_table_name(&table_name); @@ -1709,19 +1934,34 @@ impl OlapOperations for ConfiguredDBClient { // This is more reliable than using the system.tables engine column which // only contains the engine name without parameters (e.g., "S3Queue" instead of // "S3Queue('path', 'format', ...)") - let engine_parsed = if let Some(engine_def) = + let engine_str_to_parse = if let Some(engine_def) = extract_engine_from_create_table(&create_query) { - // Try to parse the extracted engine definition - engine_def.as_str().try_into().ok() + engine_def } else { // Fallback to the simple engine name from system.tables debug!("Could not extract engine from CREATE TABLE query, falling back to system.tables engine column"); - engine.as_str().try_into().ok() + engine.clone() }; - let engine_params_hash = engine_parsed - .as_ref() - .map(|e: &ClickhouseEngine| e.non_alterable_params_hash()); + + // Try to parse the engine string + let engine_parsed: ClickhouseEngine = match engine_str_to_parse.as_str().try_into() { + Ok(engine) => engine, + Err(failed_str) => { + warn!( + "Failed to parse engine for table '{}': '{}'. This may indicate an unsupported engine type.", + table_name, failed_str + ); + unsupported_tables.push(TableWithUnsupportedType { + database: database.clone(), + name: table_name.clone(), + col_name: "__engine".to_string(), + col_type: String::from(failed_str), + }); + continue 'table_loop; + } + }; + let engine_params_hash = Some(engine_parsed.non_alterable_params_hash()); // Extract table settings from CREATE TABLE query let table_settings = extract_table_settings_from_create_table(&create_query); @@ -1747,7 +1987,7 @@ impl OlapOperations for ConfiguredDBClient { let table = Table { // keep the name with version suffix, following PartialInfrastructureMap.convert_tables name: table_name, - columns, + columns: final_columns, order_by: OrderBy::Fields(order_by_cols), // Use the extracted ORDER BY columns partition_by: { let p = partition_key.trim(); @@ -1765,6 +2005,11 @@ impl OlapOperations for ConfiguredDBClient { indexes, database: Some(database), table_ttl_setting, + // cluster_name is always None from introspection because ClickHouse doesn't store + // the ON CLUSTER clause - it's only used during DDL execution and isn't persisted + // in system tables. Users must manually specify cluster in their table configs. + cluster_name: None, + primary_key_expression: final_primary_key_expression, }; debug!("Created table object: {:?}", table); @@ -1777,6 +2022,267 @@ impl OlapOperations for ConfiguredDBClient { ); Ok((tables, unsupported_tables)) } + + /// Retrieves all SQL resources (views and materialized views) from the ClickHouse database + /// + /// # Arguments + /// * `db_name` - The name of the database to list SQL resources from + /// * `default_database` - The default database name for resolving unqualified table references + /// + /// # Returns + /// * `Result, OlapChangesError>` - A list of SqlResource objects + /// + /// # Details + /// This implementation: + /// 1. Queries system.tables for views and materialized views + /// 2. Parses the CREATE statements to extract dependencies + /// 3. Reconstructs SqlResource objects with setup and teardown scripts + /// 4. Extracts data lineage (pulls_data_from and pushes_data_to) + async fn list_sql_resources( + &self, + db_name: &str, + default_database: &str, + ) -> Result, OlapChangesError> { + debug!( + "Starting list_sql_resources operation for database: {}", + db_name + ); + + // We query `as_select` from system.tables to get the clean SELECT statement + // without the view's column definitions (e.g., `CREATE VIEW v (col1 Type) AS ...`). + // This avoids complex parsing logic to strip those columns manually. + let query = format!( + r#" + SELECT + name, + database, + engine, + create_table_query, + as_select + FROM system.tables + WHERE database = '{}' + AND engine IN ('View', 'MaterializedView') + AND NOT name LIKE '.%' + ORDER BY name + "#, + db_name + ); + debug!("Executing SQL resources query: {}", query); + + let mut cursor = self + .client + .query(&query) + .fetch::<(String, String, String, String, String)>() + .map_err(|e| { + debug!("Error fetching SQL resources: {}", e); + OlapChangesError::DatabaseError(e.to_string()) + })?; + + let mut sql_resources = Vec::new(); + + while let Some((name, database, engine, create_query, as_select)) = cursor + .next() + .await + .map_err(|e| OlapChangesError::DatabaseError(e.to_string()))? + { + debug!("Processing SQL resource: {} (engine: {})", name, engine); + debug!("Create query: {}", create_query); + + // Reconstruct SqlResource based on engine type + let sql_resource = match engine.as_str() { + "MaterializedView" => reconstruct_sql_resource_from_mv( + name, + create_query, + as_select, + database, + default_database, + )?, + "View" => { + reconstruct_sql_resource_from_view(name, as_select, database, default_database)? + } + _ => { + warn!("Unexpected engine type for SQL resource: {}", engine); + continue; + } + }; + + sql_resources.push(sql_resource); + } + + debug!( + "Completed list_sql_resources operation, found {} SQL resources", + sql_resources.len() + ); + Ok(sql_resources) + } +} + +static MATERIALIZED_VIEW_TO_PATTERN: LazyLock = LazyLock::new(|| { + // Pattern to extract TO from CREATE MATERIALIZED VIEW + regex::Regex::new(r"(?i)\bTO\s+([a-zA-Z0-9_.`]+)") + .expect("MATERIALIZED_VIEW_TO_PATTERN regex should compile") +}); + +/// Reconstructs a SqlResource from a materialized view's CREATE statement +/// +/// # Arguments +/// * `name` - The name of the materialized view +/// * `create_query` - The CREATE MATERIALIZED VIEW statement from ClickHouse +/// * `as_select` - The SELECT part of the query (clean, from system.tables) +/// * `database` - The database where the view is located +/// * `default_database` - The default database for resolving unqualified table references +/// +/// # Returns +/// * `Result` - The reconstructed SqlResource +fn reconstruct_sql_resource_from_mv( + name: String, + create_query: String, + as_select: String, + database: String, + default_database: &str, +) -> Result { + // Extract target table from create_query for MV + let target_table = MATERIALIZED_VIEW_TO_PATTERN + .captures(&create_query) + .and_then(|caps| caps.get(1)) + .map(|m| m.as_str().replace('`', "")) + .ok_or_else(|| { + OlapChangesError::DatabaseError(format!( + "Could not find TO target in materialized view definition: {}", + name + )) + })?; + + // Extract pushes_data_to (target table for MV) + let (target_base_name, _version) = extract_version_from_table_name(&target_table); + let (target_db, target_name_only) = split_qualified_name(&target_base_name); + + let target_qualified_id = if let Some(target_db) = target_db { + if target_db == default_database { + target_name_only + } else { + format!("{}_{}", target_db, target_name_only) + } + } else { + target_name_only + }; + + let pushes_data_to = vec![InfrastructureSignature::Table { + id: target_qualified_id, + }]; + + // Reconstruct with MV-specific CREATE statement + let setup_raw = format!( + "CREATE MATERIALIZED VIEW IF NOT EXISTS {} TO {} AS {}", + name, target_table, as_select + ); + + reconstruct_sql_resource_common( + name, + setup_raw, + as_select, + database, + default_database, + pushes_data_to, + ) +} + +/// Reconstructs a SqlResource from a view's CREATE statement +/// +/// # Arguments +/// * `name` - The name of the view +/// * `as_select` - The SELECT part of the query (clean, from system.tables) +/// * `database` - The database where the view is located +/// * `default_database` - The default database for resolving unqualified table references +/// +/// # Returns +/// * `Result` - The reconstructed SqlResource +fn reconstruct_sql_resource_from_view( + name: String, + as_select: String, + database: String, + default_database: &str, +) -> Result { + // Views don't push data to tables + let pushes_data_to = vec![]; + + // Reconstruct with view-specific CREATE statement + let setup_raw = format!("CREATE VIEW IF NOT EXISTS {} AS {}", name, as_select); + + reconstruct_sql_resource_common( + name, + setup_raw, + as_select, + database, + default_database, + pushes_data_to, + ) +} + +/// Common logic for reconstructing SqlResource from MV or View +fn reconstruct_sql_resource_common( + name: String, + setup_raw: String, + as_select: String, + database: String, + default_database: &str, + pushes_data_to: Vec, +) -> Result { + // Normalize the SQL for consistent comparison + let setup = normalize_sql_for_comparison(&setup_raw, default_database); + + // Generate teardown script + let teardown = format!("DROP VIEW IF EXISTS `{}`", name); + + // Parse as_select to get source tables (lineage) + // Try standard SQL parser first, but fall back to regex if it fails + let source_tables = match extract_source_tables_from_query(&as_select) { + Ok(tables) => tables, + Err(e) => { + warn!( + "Could not parse {} query with standard SQL parser ({}), using regex fallback", + name, e + ); + extract_source_tables_from_query_regex(&as_select, default_database).map_err(|e| { + OlapChangesError::DatabaseError(format!( + "Failed to extract source tables from {} using regex fallback: {}", + name, e + )) + })? + } + }; + + // Extract pulls_data_from (source tables) + let pulls_data_from = source_tables + .into_iter() + .map(|table_ref| { + // Get the table name, strip version suffix if present + let table_name = table_ref.table; + let (base_name, _version) = extract_version_from_table_name(&table_name); + + // Use database from table reference if available, otherwise use default + let qualified_id = if let Some(db) = table_ref.database { + if db == default_database { + base_name + } else { + format!("{}_{}", db, base_name) + } + } else { + base_name + }; + + InfrastructureSignature::Table { id: qualified_id } + }) + .collect(); + + Ok(SqlResource { + name, + database: Some(database), + setup: vec![setup], + teardown: vec![teardown], + pulls_data_from, + pushes_data_to, + }) } /// Regex pattern to find keywords that terminate an ORDER BY clause @@ -2283,7 +2789,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; let ch_after = std_column_to_clickhouse_column(after_column).unwrap(); - let sqls = build_modify_column_sql("db", "table", &ch_after, false, false).unwrap(); + let sqls = build_modify_column_sql("db", "table", &ch_after, false, false, None).unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2315,8 +2821,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; // Use the pure SQL builder for comment-only update - let sql = - build_modify_column_comment_sql("db", "table", &after_column.name, "new").unwrap(); + let sql = build_modify_column_comment_sql("db", "table", &after_column.name, "new", None) + .unwrap(); assert_eq!( sql, "ALTER TABLE `db`.`table` MODIFY COLUMN `status` COMMENT 'new'" @@ -2344,7 +2850,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); let sqls = - build_modify_column_sql("test_db", "users", &clickhouse_column, false, false).unwrap(); + build_modify_column_sql("test_db", "users", &clickhouse_column, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2370,8 +2877,15 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ttl: None, }; - let sqls = build_modify_column_sql("test_db", "test_table", &sample_hash_col, false, false) - .unwrap(); + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &sample_hash_col, + false, + false, + None, + ) + .unwrap(); assert_eq!(sqls.len(), 1); // The fix ensures xxHash64(_id) is NOT quoted - if it were quoted, ClickHouse would treat it as a string literal @@ -2392,8 +2906,9 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ttl: None, }; - let sqls = build_modify_column_sql("test_db", "test_table", &created_at_col, false, false) - .unwrap(); + let sqls = + build_modify_column_sql("test_db", "test_table", &created_at_col, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); // The fix ensures now() is NOT quoted @@ -2415,7 +2930,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; let sqls = - build_modify_column_sql("test_db", "test_table", &status_col, false, false).unwrap(); + build_modify_column_sql("test_db", "test_table", &status_col, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); // String literals should preserve their quotes @@ -2487,6 +3003,62 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ); } + #[test] + fn test_primary_key_normalization_single_element_tuple() { + // Test that "(id)" and "id" normalize to the same value + // This is the bug fix: single-element tuples should have outer parens stripped + let normalize = |s: &str| -> String { + let mut normalized = s.trim().trim_matches('`').replace('`', "").replace(" ", ""); + + if normalized.starts_with('(') && normalized.ends_with(')') { + let inner = &normalized[1..normalized.len() - 1]; + let has_top_level_comma = { + let mut depth = 0; + let mut found_comma = false; + for ch in inner.chars() { + match ch { + '(' => depth += 1, + ')' => depth -= 1, + ',' if depth == 0 => { + found_comma = true; + break; + } + _ => {} + } + } + found_comma + }; + + if !has_top_level_comma { + normalized = inner.to_string(); + } + } + + normalized + }; + + // Single element: "(id)" should normalize to "id" + assert_eq!(normalize("(id)"), "id"); + assert_eq!(normalize("id"), "id"); + assert_eq!(normalize("(id)"), normalize("id")); + + // Single element with function: "(cityHash64(id))" should normalize to "cityHash64(id)" + assert_eq!(normalize("(cityHash64(id))"), "cityHash64(id)"); + assert_eq!(normalize("cityHash64(id)"), "cityHash64(id)"); + assert_eq!(normalize("(cityHash64(id))"), normalize("cityHash64(id)")); + + // Multiple elements: "(id, ts)" should stay as "(id,ts)" (with spaces removed) + assert_eq!(normalize("(id, ts)"), "(id,ts)"); + assert_eq!(normalize("(id,ts)"), "(id,ts)"); + + // Multiple elements with functions: should keep parens + assert_eq!(normalize("(id, cityHash64(ts))"), "(id,cityHash64(ts))"); + + // Backticks should be removed + assert_eq!(normalize("(`id`)"), "id"); + assert_eq!(normalize("(` id `)"), "id"); + } + #[test] fn test_normalize_ttl_expression() { // Test DAY conversion @@ -2727,7 +3299,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Test".to_string(), @@ -2739,7 +3311,9 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra table_settings: None, indexes: vec![], database: None, + cluster_name: None, table_ttl_setting: Some("created_at + INTERVAL 30 DAY".to_string()), + primary_key_expression: None, }; let ignore_ops = vec![ @@ -2792,7 +3366,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Test".to_string(), @@ -2804,7 +3378,9 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra table_settings: None, indexes: vec![], database: None, + cluster_name: None, table_ttl_setting: Some("created_at + INTERVAL 30 DAY".to_string()), + primary_key_expression: None, }; let ignore_ops = vec![]; @@ -2824,4 +3400,111 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra "Column TTL should remain unchanged" ); } + + #[test] + fn test_reconstruct_sql_resource_from_mv_with_standard_sql() { + let create_query = + "CREATE MATERIALIZED VIEW test_mv TO target_table AS SELECT id FROM source".to_string(); + let as_select = "SELECT id FROM source".to_string(); + + let result = reconstruct_sql_resource_from_mv( + "test_mv".to_string(), + create_query, + as_select, + "mydb".to_string(), + "mydb", + ) + .unwrap(); + + assert_eq!(result.name, "test_mv"); + assert_eq!(result.pulls_data_from.len(), 1); + assert_eq!(result.pushes_data_to.len(), 1); + match &result.pushes_data_to[0] { + InfrastructureSignature::Table { id } => assert_eq!(id, "target_table"), + _ => panic!("Expected Table signature"), + } + } + + #[test] + fn test_reconstruct_sql_resource_from_mv_with_clickhouse_array_syntax() { + // Reproduces customer issue: MV with ClickHouse array literals + let create_query = + "CREATE MATERIALIZED VIEW test_mv TO target AS SELECT * FROM source".to_string(); + let as_select = r#" + SELECT name, count() as total + FROM mydb.source_table + WHERE arrayExists(x -> (lower(name) LIKE x), ['pattern1', 'pattern2']) + AND status NOT IN ['active', 'pending'] + GROUP BY name + "# + .to_string(); + + // Should not panic, should use regex fallback + let result = reconstruct_sql_resource_from_mv( + "test_mv".to_string(), + create_query, + as_select, + "mydb".to_string(), + "mydb", + ) + .unwrap(); + + assert_eq!(result.name, "test_mv"); + // Regex fallback should extract source_table + assert_eq!(result.pulls_data_from.len(), 1); + match &result.pulls_data_from[0] { + InfrastructureSignature::Table { id } => assert_eq!(id, "source_table"), + _ => panic!("Expected Table signature"), + } + } + + #[test] + fn test_reconstruct_sql_resource_from_view_with_clickhouse_array_syntax() { + let as_select = r#" + SELECT id, name + FROM db1.table1 + WHERE status IN ['active', 'pending'] + "# + .to_string(); + + // Should not panic, should use regex fallback + let result = reconstruct_sql_resource_from_view( + "test_view".to_string(), + as_select, + "db1".to_string(), + "db1", + ) + .unwrap(); + + assert_eq!(result.name, "test_view"); + assert_eq!(result.pulls_data_from.len(), 1); + match &result.pulls_data_from[0] { + InfrastructureSignature::Table { id } => assert_eq!(id, "table1"), + _ => panic!("Expected Table signature"), + } + assert_eq!(result.pushes_data_to.len(), 0); + } + + #[test] + fn test_reconstruct_sql_resource_from_mv_strips_backticks_from_target() { + // Tests the backtick stripping fix in target table extraction + let create_query = + "CREATE MATERIALIZED VIEW mv TO `my_db`.`my_target` AS SELECT * FROM src".to_string(); + let as_select = "SELECT * FROM src".to_string(); + + let result = reconstruct_sql_resource_from_mv( + "mv".to_string(), + create_query, + as_select, + "my_db".to_string(), + "my_db", + ) + .unwrap(); + + // Target table name should have backticks stripped + match &result.pushes_data_to[0] { + InfrastructureSignature::Table { id } => assert_eq!(id, "my_target"), + _ => panic!("Expected Table signature"), + } + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs index a51e6c656f..d21ca45082 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs @@ -657,6 +657,10 @@ pub struct ClickHouseTable { pub indexes: Vec, /// Optional TTL expression at table level (without leading 'TTL') pub table_ttl_setting: Option, + /// Optional cluster name for ON CLUSTER support + pub cluster_name: Option, + /// Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified) + pub primary_key_expression: Option, } impl ClickHouseTable { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 7bd972a70f..5a2c736a2b 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -1,8 +1,8 @@ use handlebars::{no_escape, Handlebars}; -use log::info; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use sha2::{Digest, Sha256}; +use tracing::info; use super::errors::ClickhouseError; use super::model::ClickHouseColumn; @@ -121,7 +121,8 @@ pub fn create_alias_for_table( } static CREATE_TABLE_TEMPLATE: &str = r#" -CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}` +CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} +ON CLUSTER {{cluster_name}}{{/if}} ( {{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#unless @last}}, {{/unless}}{{/each}}{{#if has_indexes}}, {{#each indexes}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}} @@ -134,9 +135,104 @@ ORDER BY ({{order_by_string}}){{/if}}{{#if ttl_clause}} TTL {{ttl_clause}}{{/if}}{{#if settings}} SETTINGS {{settings}}{{/if}}"#; +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] +pub struct BufferEngine { + // Target database name + pub target_database: String, + // Target table name + pub target_table: String, + // Number of buffer layers (typically 16) + pub num_layers: u32, + // Minimum time in seconds before flushing + pub min_time: u32, + // Maximum time in seconds before flushing + pub max_time: u32, + // Minimum number of rows before flushing + pub min_rows: u64, + // Maximum number of rows before flushing + pub max_rows: u64, + // Minimum bytes before flushing + pub min_bytes: u64, + // Maximum bytes before flushing + pub max_bytes: u64, + // Optional flush time + pub flush_time: Option, + // Optional flush rows + pub flush_rows: Option, + // Optional flush bytes + pub flush_bytes: Option, +} + +impl BufferEngine { + /// Helper function to append nested optional flush parameters for Buffer engine + /// Returns comma-separated string of flush parameters that are present + /// Validates nested optional constraint: flush_rows requires flush_time, flush_bytes requires both + fn append_buffer_flush_params( + flush_time: &Option, + flush_rows: &Option, + flush_bytes: &Option, + ) -> String { + // Warn about invalid combinations (but serialize what we can) + if flush_rows.is_some() && flush_time.is_none() { + tracing::warn!( + "Buffer engine has flush_rows but no flush_time - flush_rows will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + if flush_bytes.is_some() && (flush_time.is_none() || flush_rows.is_none()) { + tracing::warn!( + "Buffer engine has flush_bytes but missing flush_time or flush_rows - flush_bytes will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + + let mut params = String::new(); + if let Some(ft) = flush_time { + params.push_str(&format!(", {}", ft)); + + if let Some(fr) = flush_rows { + params.push_str(&format!(", {}", fr)); + + if let Some(fb) = flush_bytes { + params.push_str(&format!(", {}", fb)); + } + } + } + params + } + + /// Serialize Buffer engine to string format for proto storage + /// Format: Buffer('database', 'table', num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time[, flush_rows[, flush_bytes]]]) + /// Note: flush parameters are nested optionals - you cannot skip earlier parameters + fn build_string(&self) -> String { + let mut result = format!( + "Buffer('{}', '{}', {}, {}, {}, {}, {}, {}, {}", + self.target_database, + self.target_table, + self.num_layers, + self.min_time, + self.max_time, + self.min_rows, + self.max_rows, + self.min_bytes, + self.max_bytes + ); + + result.push_str(&Self::append_buffer_flush_params( + &self.flush_time, + &self.flush_rows, + &self.flush_bytes, + )); + result.push(')'); + result + } +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[allow(clippy::large_enum_variant)] // S3Queue has many fields, but this is acceptable for our use case +#[derive(Default)] pub enum ClickhouseEngine { + #[default] MergeTree, ReplacingMergeTree { // Optional version column for deduplication @@ -213,32 +309,7 @@ pub enum ClickhouseEngine { // Partition columns in data file partition_columns_in_data_file: Option, }, - Buffer { - // Target database name - target_database: String, - // Target table name - target_table: String, - // Number of buffer layers (typically 16) - num_layers: u32, - // Minimum time in seconds before flushing - min_time: u32, - // Maximum time in seconds before flushing - max_time: u32, - // Minimum number of rows before flushing - min_rows: u64, - // Maximum number of rows before flushing - max_rows: u64, - // Minimum bytes before flushing - min_bytes: u64, - // Maximum bytes before flushing - max_bytes: u64, - // Optional flush time - flush_time: Option, - // Optional flush rows - flush_rows: Option, - // Optional flush bytes - flush_bytes: Option, - }, + Buffer(BufferEngine), Distributed { // Cluster name from ClickHouse configuration cluster: String, @@ -251,6 +322,18 @@ pub enum ClickhouseEngine { // Optional policy name policy_name: Option, }, + IcebergS3 { + // S3 path to Iceberg table root + path: String, + // Data format (Parquet or ORC) + format: String, + // AWS access key ID (optional, None for NOSIGN) + aws_access_key_id: Option, + // AWS secret access key (optional) + aws_secret_access_key: Option, + // Compression type (optional: gzip, zstd, etc.) + compression: Option, + }, } // The implementation is not symetric between TryFrom and Into so we @@ -326,33 +409,7 @@ impl Into for ClickhouseEngine { &partition_strategy, &partition_columns_in_data_file, ), - ClickhouseEngine::Buffer { - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - flush_time, - flush_rows, - flush_bytes, - } => Self::serialize_buffer( - &target_database, - &target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - &flush_time, - &flush_rows, - &flush_bytes, - ), + ClickhouseEngine::Buffer(buffer_engine) => buffer_engine.build_string(), ClickhouseEngine::Distributed { cluster, target_database, @@ -366,6 +423,22 @@ impl Into for ClickhouseEngine { &sharding_key, &policy_name, ), + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => Self::serialize_icebergs3_for_display( + &path, + &format, + &aws_access_key_id, + &aws_secret_access_key, + &compression, + ), + // this might sound obvious, but when you edit this function + // please check if you have changed the parsing side (try_from) as well + // especially if you're an LLM } } } @@ -742,6 +815,9 @@ impl ClickhouseEngine { } s if s.starts_with("S3Queue(") => Self::parse_regular_s3queue(s, value), s if s.starts_with("S3(") => Self::parse_regular_s3(s, value), + s if s.starts_with("Buffer(") => Self::parse_regular_buffer(s, value), + s if s.starts_with("Distributed(") => Self::parse_regular_distributed(s, value), + s if s.starts_with("Iceberg(") => Self::parse_regular_icebergs3(s, value), _ => Err(value), } } @@ -788,6 +864,104 @@ impl ClickhouseEngine { } } + /// Parse regular Iceberg with parameters + fn parse_regular_icebergs3<'a>( + engine_name: &str, + original_value: &'a str, + ) -> Result { + if let Some(content) = engine_name + .strip_prefix("Iceberg(") + .and_then(|s| s.strip_suffix(")")) + { + Self::parse_icebergs3(content).map_err(|_| original_value) + } else { + Err(original_value) + } + } + + /// Parse Iceberg engine content + /// Format: Iceberg('path', [NOSIGN | 'key', 'secret'], 'format'[, 'compression']) + /// or simplified: Iceberg('path', 'format'[, 'compression']) + fn parse_icebergs3(content: &str) -> Result { + let parts = parse_quoted_csv(content); + + if parts.len() < 2 { + return Err("Iceberg requires at least path and format".to_string()); + } + + let path = parts[0].clone(); + + // Parse authentication and format based on ClickHouse IcebergS3 syntax: + // ENGINE = IcebergS3(url, [, NOSIGN | access_key_id, secret_access_key, [session_token]], format, [,compression]) + // + // Possible patterns: + // 1. Iceberg('path', 'format') - no auth + // 2. Iceberg('path', 'format', 'compression') - no auth with compression + // 3. Iceberg('path', NOSIGN, 'format') - explicit NOSIGN + // 4. Iceberg('path', 'access_key_id', 'secret_access_key', 'format') - with credentials + // 5. Iceberg('path', 'access_key_id', 'secret_access_key', 'format', 'compression') - with credentials and compression + let (format, aws_access_key_id, aws_secret_access_key, extra_params_start) = if parts.len() + >= 2 + && parts[1].to_uppercase() == "NOSIGN" + { + // NOSIGN keyword (no authentication) - format is at position 2 + if parts.len() < 3 { + return Err("Iceberg with NOSIGN requires format parameter".to_string()); + } + (parts[2].clone(), None, None, 3) + } else if parts.len() >= 2 { + let format_at_pos1 = parts[1].to_uppercase(); + let is_pos1_format = format_at_pos1 == "PARQUET" || format_at_pos1 == "ORC"; + + if is_pos1_format { + // Format is at position 1, no credentials + (parts[1].clone(), None, None, 2) + } else if parts.len() >= 4 && !parts[1].is_empty() && !parts[2].is_empty() { + // Check if parts[3] is a format (credentials case) + let format_at_pos3 = parts[3].to_uppercase(); + if format_at_pos3 == "PARQUET" || format_at_pos3 == "ORC" { + // parts[1] and parts[2] are credentials, format at position 3 + ( + parts[3].clone(), + Some(parts[1].clone()), + Some(parts[2].clone()), + 4, + ) + } else { + // Ambiguous case - neither pos1 nor pos3 is a valid format + return Err(format!( + "Invalid Iceberg format. Expected 'Parquet' or 'ORC' at position 2 or 4, got '{}' and '{}'", + parts[1], parts[3] + )); + } + } else { + // Not enough parts for credentials, but parts[1] is not a valid format + return Err(format!( + "Invalid Iceberg format '{}'. Must be 'Parquet' or 'ORC'", + parts[1] + )); + } + } else { + return Err("Iceberg requires at least path and format parameters".to_string()); + }; + + // Parse optional compression (next parameter after format) + let compression = if parts.len() > extra_params_start && parts[extra_params_start] != "null" + { + Some(parts[extra_params_start].clone()) + } else { + None + }; + + Ok(ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + }) + } + /// Parse regular SummingMergeTree with parameters fn parse_regular_summing_merge_tree<'a>( engine_name: &str, @@ -802,6 +976,119 @@ impl ClickhouseEngine { Err(original_value) } } + + /// Parse regular Buffer with parameters + /// Format: Buffer('db', 'table', num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time][, flush_rows][, flush_bytes]) + fn parse_regular_buffer<'a>( + engine_name: &str, + original_value: &'a str, + ) -> Result { + if let Some(content) = engine_name + .strip_prefix("Buffer(") + .and_then(|s| s.strip_suffix(")")) + { + let params = parse_quoted_csv(content); + + // Need at least 9 parameters (database, table, and 7 numeric values) + if params.len() < 9 { + return Err(original_value); + } + + // Parse required parameters + let target_database = params[0].clone(); + let target_table = params[1].clone(); + let num_layers = params[2].parse::().map_err(|_| original_value)?; + let min_time = params[3].parse::().map_err(|_| original_value)?; + let max_time = params[4].parse::().map_err(|_| original_value)?; + let min_rows = params[5].parse::().map_err(|_| original_value)?; + let max_rows = params[6].parse::().map_err(|_| original_value)?; + let min_bytes = params[7].parse::().map_err(|_| original_value)?; + let max_bytes = params[8].parse::().map_err(|_| original_value)?; + + // Parse optional parameters (flush_time, flush_rows, flush_bytes) + let flush_time = if params.len() > 9 { + Some(params[9].parse::().map_err(|_| original_value)?) + } else { + None + }; + + let flush_rows = if params.len() > 10 { + Some(params[10].parse::().map_err(|_| original_value)?) + } else { + None + }; + + let flush_bytes = if params.len() > 11 { + Some(params[11].parse::().map_err(|_| original_value)?) + } else { + None + }; + + Ok(ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + })) + } else { + Err(original_value) + } + } + + /// Parse regular Distributed with parameters + /// Format: Distributed('cluster', 'database', 'table'[, sharding_key][, 'policy']) + fn parse_regular_distributed<'a>( + engine_name: &str, + original_value: &'a str, + ) -> Result { + if let Some(content) = engine_name + .strip_prefix("Distributed(") + .and_then(|s| s.strip_suffix(")")) + { + let params = parse_quoted_csv(content); + + // Need at least 3 parameters (cluster, database, table) + if params.len() < 3 { + return Err(original_value); + } + + let cluster = params[0].clone(); + let target_database = params[1].clone(); + let target_table = params[2].clone(); + + // Parse optional sharding_key (4th parameter, not quoted - it's an expression) + let sharding_key = if params.len() > 3 { + Some(params[3].clone()) + } else { + None + }; + + // Parse optional policy_name (5th parameter, quoted) + let policy_name = if params.len() > 4 { + Some(params[4].clone()) + } else { + None + }; + + Ok(ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + }) + } else { + Err(original_value) + } + } } /// Parse comma-separated values from a string @@ -940,33 +1227,7 @@ impl ClickhouseEngine { partition_strategy, partition_columns_in_data_file, ), - ClickhouseEngine::Buffer { - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - flush_time, - flush_rows, - flush_bytes, - } => Self::serialize_buffer_proto( - target_database, - target_table, - *num_layers, - *min_time, - *max_time, - *min_rows, - *max_rows, - *min_bytes, - *max_bytes, - flush_time, - flush_rows, - flush_bytes, - ), + ClickhouseEngine::Buffer(buffer_engine) => buffer_engine.build_string(), ClickhouseEngine::Distributed { cluster, target_database, @@ -980,6 +1241,16 @@ impl ClickhouseEngine { sharding_key, policy_name, ), + ClickhouseEngine::IcebergS3 { + path, + format, + compression, + .. // Omit credentials for protobuf + } => Self::serialize_icebergs3( + path, + format, + compression, + ), } } @@ -996,7 +1267,7 @@ impl ClickhouseEngine { if ver.is_some() { params.push(format!("'{}'", d)); } else { - log::warn!("is_deleted requires ver to be specified, this was not caught by the validation"); + tracing::warn!("is_deleted requires ver to be specified, this was not caught by the validation"); } } if !params.is_empty() { @@ -1119,49 +1390,35 @@ impl ClickhouseEngine { result } - /// Serialize Buffer engine to string format for proto storage - /// Format: Buffer('database', 'table', num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time, flush_rows, flush_bytes]) - #[allow(clippy::too_many_arguments)] - fn serialize_buffer_proto( - target_database: &str, - target_table: &str, - num_layers: u32, - min_time: u32, - max_time: u32, - min_rows: u64, - max_rows: u64, - min_bytes: u64, - max_bytes: u64, - flush_time: &Option, - flush_rows: &Option, - flush_bytes: &Option, + /// Helper function to append nested optional parameters for Distributed engine + /// Returns comma-separated string of parameters that are present + /// Validates nested optional constraint: policy_name requires sharding_key + fn append_distributed_optional_params( + sharding_key: &Option, + policy_name: &Option, + quote_policy: bool, ) -> String { - let mut result = format!( - "Buffer('{}', '{}', {}, {}, {}, {}, {}, {}, {}", - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes - ); - - // Add optional flush parameters - if let Some(ft) = flush_time { - result.push_str(&format!(", {}", ft)); - } - if let Some(fr) = flush_rows { - result.push_str(&format!(", {}", fr)); - } - if let Some(fb) = flush_bytes { - result.push_str(&format!(", {}", fb)); + // Warn about invalid combination + if policy_name.is_some() && sharding_key.is_none() { + tracing::warn!( + "Distributed engine has policy_name but no sharding_key - policy_name will be ignored. \ + This violates ClickHouse nested optional constraint." + ); } - result.push(')'); - result + let mut params = String::new(); + if let Some(key) = sharding_key { + params.push_str(&format!(", {}", key)); // Expression, not quoted + + if let Some(policy) = policy_name { + if quote_policy { + params.push_str(&format!(", '{}'", policy)); + } else { + params.push_str(&format!(", {}", policy)); + } + } + } + params } /// Serialize Distributed engine to string format for proto storage @@ -1178,16 +1435,11 @@ impl ClickhouseEngine { cluster, target_database, target_table ); - // Add sharding key if present - if let Some(key) = sharding_key { - result.push_str(&format!(", {}", key)); - } - - // Add policy name if present - if let Some(policy) = policy_name { - result.push_str(&format!(", '{}'", policy)); - } - + result.push_str(&Self::append_distributed_optional_params( + sharding_key, + policy_name, + true, + )); result.push(')'); result } @@ -1275,55 +1527,57 @@ impl ClickhouseEngine { result } - /// Serialize Buffer engine to string format - /// Format: Buffer('database', 'table', num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time, flush_rows, flush_bytes]) - #[allow(clippy::too_many_arguments)] - fn serialize_buffer( - target_database: &str, - target_table: &str, - num_layers: u32, - min_time: u32, - max_time: u32, - min_rows: u64, - max_rows: u64, - min_bytes: u64, - max_bytes: u64, - flush_time: &Option, - flush_rows: &Option, - flush_bytes: &Option, + /// Serialize Iceberg engine to string format for display (with masked credentials) + /// Format: Iceberg('url', [NOSIGN | 'access_key_id', 'secret_access_key'], 'format'[, 'compression']) + fn serialize_icebergs3_for_display( + path: &str, + format: &str, + aws_access_key_id: &Option, + aws_secret_access_key: &Option, + compression: &Option, ) -> String { - let mut result = format!( - "Buffer('{}', '{}', {}, {}, {}, {}, {}, {}, {}", - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes - ); + let mut result = format!("Iceberg('{}'", path); - // Add optional flush parameters if any are present - if flush_time.is_some() || flush_rows.is_some() || flush_bytes.is_some() { - if let Some(ft) = flush_time { - result.push_str(&format!(", {}", ft)); - } - if let Some(fr) = flush_rows { - result.push_str(&format!(", {}", fr)); + // Add authentication info for display - uses shared masking logic + match (aws_access_key_id, aws_secret_access_key) { + (Some(key_id), Some(secret)) => { + let masked_secret = Self::mask_secret(secret); + result.push_str(&format!(", '{}', '{}'", key_id, masked_secret)); } - if let Some(fb) = flush_bytes { - result.push_str(&format!(", {}", fb)); + _ => { + // No credentials provided - using NOSIGN for public buckets or IAM roles + result.push_str(", NOSIGN"); } } + // Add format + result.push_str(&format!(", '{}'", format)); + + // Add compression if present + if let Some(comp) = compression { + result.push_str(&format!(", '{}'", comp)); + } + + result.push(')'); + result + } + + /// Serialize Iceberg engine to string format for proto storage (without credentials) + /// Format: Iceberg('url', 'format'[, 'compression']) + fn serialize_icebergs3(path: &str, format: &str, compression: &Option) -> String { + let mut result = format!("Iceberg('{}', '{}'", path, format); + + // Add compression if present + if let Some(comp) = compression { + result.push_str(&format!(", '{}'", comp)); + } + result.push(')'); result } /// Serialize Distributed engine to string format - /// Format: Distributed('cluster', 'database', 'table'[, 'sharding_key'][, 'policy_name']) + /// Format: Distributed('cluster', 'database', 'table'[, sharding_key][, 'policy_name']) fn serialize_distributed( cluster: &str, target_database: &str, @@ -1336,16 +1590,11 @@ impl ClickhouseEngine { cluster, target_database, target_table ); - // Add sharding key if present - if let Some(key) = sharding_key { - result.push_str(&format!(", {}", key)); // Don't quote - it's an expression - } - - // Add policy name if present - if let Some(policy) = policy_name { - result.push_str(&format!(", '{}'", policy)); - } - + result.push_str(&Self::append_distributed_optional_params( + sharding_key, + policy_name, + true, + )); result.push(')'); result } @@ -1887,7 +2136,7 @@ impl ClickhouseEngine { hasher.update("null".as_bytes()); } } - ClickhouseEngine::Buffer { + ClickhouseEngine::Buffer(BufferEngine { target_database, target_table, num_layers, @@ -1900,7 +2149,7 @@ impl ClickhouseEngine { flush_time, flush_rows, flush_bytes, - } => { + }) => { hasher.update("Buffer".as_bytes()); hasher.update(target_database.as_bytes()); hasher.update(target_table.as_bytes()); @@ -1953,6 +2202,36 @@ impl ClickhouseEngine { hasher.update("null".as_bytes()); } } + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + hasher.update("Iceberg".as_bytes()); + hasher.update(path.as_bytes()); + hasher.update(format.as_bytes()); + + // Hash credentials (consistent with S3 and S3Queue engines) + if let Some(key_id) = aws_access_key_id { + hasher.update(key_id.as_bytes()); + } else { + hasher.update("null".as_bytes()); + } + if let Some(secret) = aws_secret_access_key { + hasher.update(secret.as_bytes()); + } else { + hasher.update("null".as_bytes()); + } + + // Hash optional parameters + if let Some(comp) = compression { + hasher.update(comp.as_bytes()); + } else { + hasher.update("null".as_bytes()); + } + } } format!("{:x}", hasher.finalize()) @@ -2011,12 +2290,14 @@ fn build_summing_merge_tree_ddl(columns: &Option>) -> String { /// Build replication parameters for replicated engines /// /// When keeper_path and replica_name are None: -/// - In dev mode: Injects default parameters for local development using a static table name hash -/// - In production: Returns empty parameters to let ClickHouse use automatic configuration -/// (ClickHouse Cloud or server-configured defaults) +/// - Dev without cluster: Injects table_name-based paths (ON CLUSTER absent, {uuid} won't work) +/// - Dev with cluster: Returns empty params (ON CLUSTER present, ClickHouse uses {uuid}) +/// - Prod with cluster: Returns empty params (ON CLUSTER present, ClickHouse uses {uuid}) +/// - Prod without cluster: Returns empty params (ClickHouse Cloud handles defaults) fn build_replication_params( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, engine_name: &str, table_name: &str, is_dev: bool, @@ -2026,18 +2307,20 @@ fn build_replication_params( Ok(vec![format!("'{}'", path), format!("'{}'", name)]) } (None, None) => { - if is_dev { - // In dev mode, inject default parameters for local ClickHouse - // Use table name to ensure unique paths per table, avoiding conflicts + // The {uuid} macro only works with ON CLUSTER queries + // Only dev without cluster needs explicit params + if is_dev && cluster_name.is_none() { + // Dev mode without cluster: inject table_name-based paths // {shard}, {replica}, and {database} macros are configured in docker-compose - // Note: {uuid} macro only works with ON CLUSTER queries, so we use table name instead Ok(vec![ format!("'/clickhouse/tables/{{database}}/{{shard}}/{}'", table_name), "'{replica}'".to_string(), ]) } else { - // In production, return empty parameters - let ClickHouse handle defaults - // This works for ClickHouse Cloud and properly configured servers + // All other cases: return empty parameters + // - Dev with cluster: ON CLUSTER present → ClickHouse uses {uuid} + // - Prod with cluster: ON CLUSTER present → ClickHouse uses {uuid} + // - Prod without cluster: ClickHouse Cloud handles defaults Ok(vec![]) } } @@ -2054,12 +2337,14 @@ fn build_replication_params( fn build_replicated_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, table_name: &str, is_dev: bool, ) -> Result { let params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedMergeTree", table_name, is_dev, @@ -2068,9 +2353,11 @@ fn build_replicated_merge_tree_ddl( } /// Generate DDL for ReplicatedReplacingMergeTree engine +#[allow(clippy::too_many_arguments)] fn build_replicated_replacing_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, ver: &Option, is_deleted: &Option, order_by_empty: bool, @@ -2093,6 +2380,7 @@ fn build_replicated_replacing_merge_tree_ddl( let mut params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedReplacingMergeTree", table_name, is_dev, @@ -2115,12 +2403,14 @@ fn build_replicated_replacing_merge_tree_ddl( fn build_replicated_aggregating_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, table_name: &str, is_dev: bool, ) -> Result { let params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedAggregatingMergeTree", table_name, is_dev, @@ -2135,6 +2425,7 @@ fn build_replicated_aggregating_merge_tree_ddl( fn build_replicated_summing_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, columns: &Option>, table_name: &str, is_dev: bool, @@ -2142,6 +2433,7 @@ fn build_replicated_summing_merge_tree_ddl( let mut params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedSummingMergeTree", table_name, is_dev, @@ -2181,7 +2473,13 @@ pub fn create_table_query( ClickhouseEngine::ReplicatedMergeTree { keeper_path, replica_name, - } => build_replicated_merge_tree_ddl(keeper_path, replica_name, &table.name, is_dev)?, + } => build_replicated_merge_tree_ddl( + keeper_path, + replica_name, + &table.cluster_name, + &table.name, + is_dev, + )?, ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path, replica_name, @@ -2190,6 +2488,7 @@ pub fn create_table_query( } => build_replicated_replacing_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, ver, is_deleted, table.order_by.is_empty(), @@ -2202,6 +2501,7 @@ pub fn create_table_query( } => build_replicated_aggregating_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, &table.name, is_dev, )?, @@ -2212,6 +2512,7 @@ pub fn create_table_query( } => build_replicated_summing_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, columns, &table.name, is_dev, @@ -2275,7 +2576,7 @@ pub fn create_table_query( format!("S3({})", engine_parts.join(", ")) } - ClickhouseEngine::Buffer { + ClickhouseEngine::Buffer(BufferEngine { target_database, target_table, num_layers, @@ -2288,7 +2589,21 @@ pub fn create_table_query( flush_time, flush_rows, flush_bytes, - } => { + }) => { + // Warn about invalid combinations + if flush_rows.is_some() && flush_time.is_none() { + tracing::warn!( + "Buffer engine has flush_rows but no flush_time - flush_rows will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + if flush_bytes.is_some() && (flush_time.is_none() || flush_rows.is_none()) { + tracing::warn!( + "Buffer engine has flush_bytes but missing flush_time or flush_rows - flush_bytes will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + let mut engine_parts = vec![ format!("'{}'", target_database), format!("'{}'", target_table), @@ -2301,15 +2616,17 @@ pub fn create_table_query( max_bytes.to_string(), ]; - // Add optional flush parameters + // Add optional flush parameters following nested optional constraint if let Some(ft) = flush_time { engine_parts.push(ft.to_string()); - } - if let Some(fr) = flush_rows { - engine_parts.push(fr.to_string()); - } - if let Some(fb) = flush_bytes { - engine_parts.push(fb.to_string()); + + if let Some(fr) = flush_rows { + engine_parts.push(fr.to_string()); + + if let Some(fb) = flush_bytes { + engine_parts.push(fb.to_string()); + } + } } format!("Buffer({})", engine_parts.join(", ")) @@ -2321,22 +2638,56 @@ pub fn create_table_query( sharding_key, policy_name, } => { + // Warn about invalid combination + if policy_name.is_some() && sharding_key.is_none() { + tracing::warn!( + "Distributed engine has policy_name but no sharding_key - policy_name will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + let mut engine_parts = vec![ format!("'{}'", cluster), format!("'{}'", target_database), format!("'{}'", target_table), ]; - // Add optional parameters + // Add optional parameters following nested optional constraint if let Some(key) = sharding_key { engine_parts.push(key.clone()); // Don't quote - it's an expression - } - if let Some(policy) = policy_name { - engine_parts.push(format!("'{}'", policy)); + + if let Some(policy) = policy_name { + engine_parts.push(format!("'{}'", policy)); + } } format!("Distributed({})", engine_parts.join(", ")) } + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + let mut engine_parts = vec![format!("'{}'", path)]; + + // Handle credentials using shared helper (same as S3Queue) + engine_parts.extend(ClickhouseEngine::format_s3_credentials_for_ddl( + aws_access_key_id, + aws_secret_access_key, + )); + + // Add format + engine_parts.push(format!("'{}'", format)); + + // Add optional compression + if let Some(comp) = compression { + engine_parts.push(format!("'{}'", comp)); + } + + format!("Iceberg({})", engine_parts.join(", ")) + } }; // Format settings from table.table_settings @@ -2359,12 +2710,30 @@ pub fn create_table_query( None }; - let primary_key = table - .columns - .iter() - .filter(|column| column.primary_key) - .map(|column| column.name.clone()) - .collect::>(); + // PRIMARY KEY: use primary_key_expression if specified, otherwise use columns with primary_key flag + let primary_key_str = if let Some(ref expr) = table.primary_key_expression { + // When primary_key_expression is specified, use it directly (ignoring column-level primary_key flags) + // Strip outer parentheses if present, as the template will add them + let trimmed = expr.trim(); + if trimmed.starts_with('(') && trimmed.ends_with(')') { + Some(trimmed[1..trimmed.len() - 1].to_string()) + } else { + Some(trimmed.to_string()) + } + } else { + // Otherwise, use columns with primary_key flag + let primary_key = table + .columns + .iter() + .filter(|column| column.primary_key) + .map(|column| column.name.clone()) + .collect::>(); + if !primary_key.is_empty() { + Some(wrap_and_join_column_names(&primary_key, ",")) + } else { + None + } + }; // Prepare indexes strings like: INDEX name expr TYPE type(args...) GRANULARITY n let (has_indexes, index_strings): (bool, Vec) = if table.indexes.is_empty() { @@ -2409,12 +2778,13 @@ pub fn create_table_query( let template_context = json!({ "db_name": db_name, "table_name": table.name, + "cluster_name": table.cluster_name.as_deref(), "fields": builds_field_context(&table.columns)?, "has_fields": !table.columns.is_empty(), "has_indexes": has_indexes, "indexes": index_strings, - "primary_key_string": if supports_primary_key && !primary_key.is_empty() { - Some(wrap_and_join_column_names(&primary_key, ",")) + "primary_key_string": if supports_primary_key { + primary_key_str } else { None }, @@ -2423,7 +2793,18 @@ pub fn create_table_query( OrderBy::Fields(v) if v.len() == 1 && v[0] == "tuple()" => Some("tuple()".to_string()), OrderBy::Fields(v) if v.is_empty() => None, OrderBy::Fields(v) => Some(wrap_and_join_column_names(v, ",")), - OrderBy::SingleExpr(expr) => Some(expr.clone()), + OrderBy::SingleExpr(expr) => { + // Strip outer parentheses if present, as the template will add them + // Exception: keep tuple() as-is since it's a function call + let trimmed = expr.trim(); + if trimmed == "tuple()" { + Some(trimmed.to_string()) + } else if trimmed.starts_with('(') && trimmed.ends_with(')') { + Some(trimmed[1..trimmed.len()-1].to_string()) + } else { + Some(trimmed.to_string()) + } + }, } } else { None @@ -2439,28 +2820,33 @@ pub fn create_table_query( } pub static DROP_TABLE_TEMPLATE: &str = r#" -DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`; +DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}} SYNC{{/if}}; "#; -pub fn drop_table_query(db_name: &str, table_name: &str) -> Result { +pub fn drop_table_query( + db_name: &str, + table_name: &str, + cluster_name: Option<&str>, +) -> Result { let mut reg = Handlebars::new(); reg.register_escape_fn(no_escape); let context = json!({ "db_name": db_name, "table_name": table_name, + "cluster_name": cluster_name, }); Ok(reg.render_template(DROP_TABLE_TEMPLATE, &context)?) } pub static ALTER_TABLE_MODIFY_SETTINGS_TEMPLATE: &str = r#" -ALTER TABLE `{{db_name}}`.`{{table_name}}` +ALTER TABLE `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} MODIFY SETTING {{settings}}; "#; pub static ALTER_TABLE_RESET_SETTINGS_TEMPLATE: &str = r#" -ALTER TABLE `{{db_name}}`.`{{table_name}}` +ALTER TABLE `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} RESET SETTING {{settings}}; "#; @@ -2469,6 +2855,7 @@ pub fn alter_table_modify_settings_query( db_name: &str, table_name: &str, settings: &std::collections::HashMap, + cluster_name: Option<&str>, ) -> Result { if settings.is_empty() { return Err(ClickhouseError::InvalidParameters { @@ -2495,6 +2882,7 @@ pub fn alter_table_modify_settings_query( "db_name": db_name, "table_name": table_name, "settings": settings_str, + "cluster_name": cluster_name, }); Ok(reg.render_template(ALTER_TABLE_MODIFY_SETTINGS_TEMPLATE, &context)?) @@ -2505,6 +2893,7 @@ pub fn alter_table_reset_settings_query( db_name: &str, table_name: &str, setting_names: &[String], + cluster_name: Option<&str>, ) -> Result { if setting_names.is_empty() { return Err(ClickhouseError::InvalidParameters { @@ -2521,6 +2910,7 @@ pub fn alter_table_reset_settings_query( "db_name": db_name, "table_name": table_name, "settings": settings_str, + "cluster_name": cluster_name, }); Ok(reg.render_template(ALTER_TABLE_RESET_SETTINGS_TEMPLATE, &context)?) @@ -2917,6 +3307,8 @@ mod tests { table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -2954,6 +3346,8 @@ PRIMARY KEY (`id`) table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -2990,6 +3384,8 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3049,6 +3445,8 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3089,6 +3487,8 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3128,6 +3528,8 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let result = create_table_query("test_db", table, false); @@ -3174,6 +3576,8 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3236,6 +3640,8 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3277,6 +3683,8 @@ ORDER BY (`id`) "#; table_settings: None, table_ttl_setting: None, indexes: vec![], + cluster_name: None, + primary_key_expression: None, }; let result = create_table_query("test_db", table, false); @@ -3432,6 +3840,8 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3448,6 +3858,101 @@ ORDER BY (`id`) "#; assert_eq!(query.trim(), expected.trim()); } + #[test] + fn test_create_table_query_with_primary_key_expression() { + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ + ClickHouseColumn { + name: "user_id".to_string(), + column_type: ClickHouseColumnType::String, + required: true, + unique: false, + primary_key: false, // primary_key flag ignored when primary_key_expression is set + default: None, + comment: None, + ttl: None, + }, + ClickHouseColumn { + name: "event_id".to_string(), + column_type: ClickHouseColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + }, + ClickHouseColumn { + name: "timestamp".to_string(), + column_type: ClickHouseColumnType::DateTime, + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + }, + ], + order_by: OrderBy::SingleExpr("(user_id, cityHash64(event_id), timestamp)".to_string()), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: Some("(user_id, cityHash64(event_id))".to_string()), + }; + + let query = create_table_query("test_db", table, false).unwrap(); + let expected = r#" +CREATE TABLE IF NOT EXISTS `test_db`.`test_table` +( + `user_id` String NOT NULL, + `event_id` String NOT NULL, + `timestamp` DateTime('UTC') NOT NULL +) +ENGINE = MergeTree +PRIMARY KEY (user_id, cityHash64(event_id)) +ORDER BY (user_id, cityHash64(event_id), timestamp)"#; + assert_eq!(query.trim(), expected.trim()); + } + + #[test] + fn test_create_table_query_with_primary_key_expression_no_parens() { + // Test that primary_key_expression works even without outer parentheses + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ClickHouseColumn { + name: "product_id".to_string(), + column_type: ClickHouseColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec!["product_id".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: Some("product_id".to_string()), + }; + + let query = create_table_query("test_db", table, false).unwrap(); + assert!(query.contains("PRIMARY KEY (product_id)")); + // Should have single parentheses, not double + assert!(!query.contains("PRIMARY KEY ((product_id))")); + } + #[test] fn test_create_table_query_s3queue() { let mut settings = std::collections::HashMap::new(); @@ -3497,6 +4002,8 @@ ORDER BY (`id`) "#; table_settings: Some(settings), indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3969,6 +4476,8 @@ SETTINGS keeper_path = '/clickhouse/s3queue/test_table', mode = 'unordered', s3q table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -4485,4 +4994,1028 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; _ => panic!("Expected ReplacingMergeTree"), } } + + #[test] + fn test_create_table_with_cluster_includes_on_cluster() { + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ClickHouseColumn { + name: "id".to_string(), + column_type: ClickHouseColumnType::ClickhouseInt(ClickHouseInt::Int32), + required: true, + primary_key: true, + unique: false, + default: None, + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec![]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::ReplicatedMergeTree { + keeper_path: None, + replica_name: None, + }, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: Some("test_cluster".to_string()), + primary_key_expression: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + + // Should include ON CLUSTER clause + assert!( + query.contains("ON CLUSTER test_cluster"), + "Query should contain ON CLUSTER clause" + ); + + // ON CLUSTER should come after CREATE TABLE but before column definitions + let create_idx = query.find("CREATE TABLE").unwrap(); + let on_cluster_idx = query.find("ON CLUSTER").unwrap(); + let engine_idx = query.find("ENGINE").unwrap(); + + assert!( + create_idx < on_cluster_idx && on_cluster_idx < engine_idx, + "ON CLUSTER should be between CREATE TABLE and ENGINE" + ); + } + + #[test] + fn test_create_table_without_cluster_no_on_cluster() { + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ClickHouseColumn { + name: "id".to_string(), + column_type: ClickHouseColumnType::ClickhouseInt(ClickHouseInt::Int32), + required: true, + primary_key: true, + unique: false, + default: None, + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec![]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + + // Should NOT include ON CLUSTER clause + assert!( + !query.contains("ON CLUSTER"), + "Query should not contain ON CLUSTER clause when cluster_name is None" + ); + } + + #[test] + fn test_drop_table_with_cluster() { + let cluster_name = Some("test_cluster"); + let query = drop_table_query("test_db", "test_table", cluster_name).unwrap(); + + // Should include ON CLUSTER clause + assert!( + query.contains("ON CLUSTER test_cluster"), + "DROP query should contain ON CLUSTER clause" + ); + + // Should have SYNC (when using ON CLUSTER) + assert!( + query.contains("SYNC"), + "DROP query should contain SYNC with ON CLUSTER" + ); + + // Should have DROP TABLE + assert!(query.contains("DROP TABLE")); + } + + #[test] + fn test_drop_table_without_cluster() { + let cluster_name = None; + let query = drop_table_query("test_db", "test_table", cluster_name).unwrap(); + + // Should NOT include ON CLUSTER clause + assert!( + !query.contains("ON CLUSTER"), + "DROP query should not contain ON CLUSTER clause when cluster_name is None" + ); + + // Should NOT have SYNC (only needed with ON CLUSTER) + assert!( + !query.contains("SYNC"), + "DROP query should not contain SYNC without ON CLUSTER" + ); + + // Should still have DROP TABLE + assert!(query.contains("DROP TABLE")); + } + + #[test] + fn test_alter_table_modify_setting_with_cluster() { + use std::collections::HashMap; + + let mut settings = HashMap::new(); + settings.insert("index_granularity".to_string(), "4096".to_string()); + settings.insert("ttl_only_drop_parts".to_string(), "1".to_string()); + + let query = alter_table_modify_settings_query( + "test_db", + "test_table", + &settings, + Some("test_cluster"), + ) + .unwrap(); + + assert!( + query.contains("ON CLUSTER test_cluster"), + "MODIFY SETTING query should contain ON CLUSTER clause" + ); + assert!(query.contains("ALTER TABLE")); + assert!(query.contains("MODIFY SETTING")); + } + + #[test] + fn test_alter_table_add_column_with_cluster() { + let column = ClickHouseColumn { + name: "new_col".to_string(), + column_type: ClickHouseColumnType::String, + required: false, + primary_key: false, + unique: false, + default: None, + comment: None, + ttl: None, + }; + + let cluster_clause = Some("test_cluster") + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + + let query = format!( + "ALTER TABLE `test_db`.`test_table`{} ADD COLUMN `{}` String FIRST", + cluster_clause, column.name + ); + + assert!( + query.contains("ON CLUSTER test_cluster"), + "ADD COLUMN query should contain ON CLUSTER clause" + ); + assert!(query.contains("ALTER TABLE")); + assert!(query.contains("ADD COLUMN")); + } + + #[test] + fn test_replication_params_dev_no_cluster_no_keeper_args_auto_injects() { + let result = build_replication_params( + &None, + &None, + &None, + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should auto-inject params in dev mode + assert_eq!(params.len(), 2); + assert!(params[0].contains("/clickhouse/tables/")); + assert!(params[1].contains("{replica}")); + } + + #[test] + fn test_replication_params_dev_with_cluster_no_keeper_args_succeeds() { + let result = build_replication_params( + &None, + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Dev with cluster: should return empty params (let CH use {uuid} with ON CLUSTER) + assert_eq!(params.len(), 0); + } + + #[test] + fn test_replication_params_dev_no_cluster_with_keeper_args_succeeds() { + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &Some("{replica}".to_string()), + &None, + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!(params.len(), 2); + assert_eq!(params[0], "'/clickhouse/tables/{database}/{table}'"); + assert_eq!(params[1], "'{replica}'"); + } + + #[test] + fn test_replication_params_prod_no_cluster_no_keeper_args_succeeds() { + let result = build_replication_params( + &None, + &None, + &None, + "ReplicatedMergeTree", + "test_table", + false, // is_dev = false (production) + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should return empty params for ClickHouse Cloud + assert_eq!(params.len(), 0); + } + + #[test] + fn test_replication_params_dev_with_cluster_and_keeper_args_succeeds() { + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &Some("{replica}".to_string()), + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should use explicit params, not auto-inject + assert_eq!(params.len(), 2); + assert_eq!(params[0], "'/clickhouse/tables/{database}/{table}'"); + assert_eq!(params[1], "'{replica}'"); + } + + #[test] + fn test_replication_params_prod_with_cluster_no_keeper_args_empty() { + let result = build_replication_params( + &None, + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + false, // is_dev = false (production) + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Prod with cluster: should return empty params (let CH use {uuid} with ON CLUSTER) + assert_eq!(params.len(), 0); + } + + #[test] + fn test_replication_params_mismatched_keeper_args_fails() { + // Only keeper_path, no replica_name + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, + ); + + assert!(result.is_err()); + let err = result.unwrap_err(); + match err { + ClickhouseError::InvalidParameters { message } => { + assert!(message.contains("requires both keeper_path and replica_name")); + } + _ => panic!("Expected InvalidParameters error"), + } + } + + #[test] + fn test_buffer_engine_round_trip() { + // Test Buffer engine with all parameters + let engine = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db".to_string(), + target_table: "table".to_string(), + num_layers: 16, + min_time: 10, + max_time: 100, + min_rows: 10000, + max_rows: 100000, + min_bytes: 10000000, + max_bytes: 100000000, + flush_time: Some(5), + flush_rows: Some(50000), + flush_bytes: Some(50000000), + }); + + let serialized: String = engine.clone().into(); + assert_eq!( + serialized, + "Buffer('db', 'table', 16, 10, 100, 10000, 100000, 10000000, 100000000, 5, 50000, 50000000)" + ); + + let parsed = ClickhouseEngine::try_from(serialized.as_str()).unwrap(); + match parsed { + ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + assert_eq!(target_database, "db"); + assert_eq!(target_table, "table"); + assert_eq!(num_layers, 16); + assert_eq!(min_time, 10); + assert_eq!(max_time, 100); + assert_eq!(min_rows, 10000); + assert_eq!(max_rows, 100000); + assert_eq!(min_bytes, 10000000); + assert_eq!(max_bytes, 100000000); + assert_eq!(flush_time, Some(5)); + assert_eq!(flush_rows, Some(50000)); + assert_eq!(flush_bytes, Some(50000000)); + } + _ => panic!("Expected Buffer engine"), + } + + // Test Buffer engine without optional parameters + let engine2 = ClickhouseEngine::Buffer(BufferEngine { + target_database: "mydb".to_string(), + target_table: "mytable".to_string(), + num_layers: 8, + min_time: 5, + max_time: 50, + min_rows: 5000, + max_rows: 50000, + min_bytes: 5000000, + max_bytes: 50000000, + flush_time: None, + flush_rows: None, + flush_bytes: None, + }); + + let serialized2: String = engine2.clone().into(); + assert_eq!( + serialized2, + "Buffer('mydb', 'mytable', 8, 5, 50, 5000, 50000, 5000000, 50000000)" + ); + + let parsed2 = ClickhouseEngine::try_from(serialized2.as_str()).unwrap(); + match parsed2 { + ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + assert_eq!(target_database, "mydb"); + assert_eq!(target_table, "mytable"); + assert_eq!(num_layers, 8); + assert_eq!(min_time, 5); + assert_eq!(max_time, 50); + assert_eq!(min_rows, 5000); + assert_eq!(max_rows, 50000); + assert_eq!(min_bytes, 5000000); + assert_eq!(max_bytes, 50000000); + assert_eq!(flush_time, None); + assert_eq!(flush_rows, None); + assert_eq!(flush_bytes, None); + } + _ => panic!("Expected Buffer engine"), + } + + // Test Buffer engine with only flush_time (nested optional - level 1) + let engine3 = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db3".to_string(), + target_table: "table3".to_string(), + num_layers: 4, + min_time: 1, + max_time: 10, + min_rows: 1000, + max_rows: 10000, + min_bytes: 1000000, + max_bytes: 10000000, + flush_time: Some(3), + flush_rows: None, + flush_bytes: None, + }); + + let serialized3: String = engine3.clone().into(); + assert_eq!( + serialized3, + "Buffer('db3', 'table3', 4, 1, 10, 1000, 10000, 1000000, 10000000, 3)" + ); + + let parsed3 = ClickhouseEngine::try_from(serialized3.as_str()).unwrap(); + match parsed3 { + ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + assert_eq!(target_database, "db3"); + assert_eq!(target_table, "table3"); + assert_eq!(num_layers, 4); + assert_eq!(min_time, 1); + assert_eq!(max_time, 10); + assert_eq!(min_rows, 1000); + assert_eq!(max_rows, 10000); + assert_eq!(min_bytes, 1000000); + assert_eq!(max_bytes, 10000000); + assert_eq!(flush_time, Some(3)); + assert_eq!(flush_rows, None); + assert_eq!(flush_bytes, None); + } + _ => panic!("Expected Buffer engine"), + } + + // Test Buffer engine with flush_time and flush_rows (nested optional - level 2) + let engine4 = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db4".to_string(), + target_table: "table4".to_string(), + num_layers: 2, + min_time: 2, + max_time: 20, + min_rows: 2000, + max_rows: 20000, + min_bytes: 2000000, + max_bytes: 20000000, + flush_time: Some(7), + flush_rows: Some(15000), + flush_bytes: None, + }); + + let serialized4: String = engine4.clone().into(); + assert_eq!( + serialized4, + "Buffer('db4', 'table4', 2, 2, 20, 2000, 20000, 2000000, 20000000, 7, 15000)" + ); + + let parsed4 = ClickhouseEngine::try_from(serialized4.as_str()).unwrap(); + match parsed4 { + ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + assert_eq!(target_database, "db4"); + assert_eq!(target_table, "table4"); + assert_eq!(num_layers, 2); + assert_eq!(min_time, 2); + assert_eq!(max_time, 20); + assert_eq!(min_rows, 2000); + assert_eq!(max_rows, 20000); + assert_eq!(min_bytes, 2000000); + assert_eq!(max_bytes, 20000000); + assert_eq!(flush_time, Some(7)); + assert_eq!(flush_rows, Some(15000)); + assert_eq!(flush_bytes, None); + } + _ => panic!("Expected Buffer engine"), + } + } + + #[test] + fn test_distributed_engine_round_trip() { + // Test Distributed engine with all parameters + let engine = ClickhouseEngine::Distributed { + cluster: "my_cluster".to_string(), + target_database: "db".to_string(), + target_table: "table".to_string(), + sharding_key: Some("cityHash64(user_id)".to_string()), + policy_name: Some("my_policy".to_string()), + }; + + let serialized: String = engine.clone().into(); + assert_eq!( + serialized, + "Distributed('my_cluster', 'db', 'table', cityHash64(user_id), 'my_policy')" + ); + + let parsed = ClickhouseEngine::try_from(serialized.as_str()).unwrap(); + match parsed { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "my_cluster"); + assert_eq!(target_database, "db"); + assert_eq!(target_table, "table"); + assert_eq!(sharding_key, Some("cityHash64(user_id)".to_string())); + assert_eq!(policy_name, Some("my_policy".to_string())); + } + _ => panic!("Expected Distributed engine"), + } + + // Test Distributed engine with only required parameters + let engine2 = ClickhouseEngine::Distributed { + cluster: "prod_cluster".to_string(), + target_database: "mydb".to_string(), + target_table: "mytable".to_string(), + sharding_key: None, + policy_name: None, + }; + + let serialized2: String = engine2.clone().into(); + assert_eq!( + serialized2, + "Distributed('prod_cluster', 'mydb', 'mytable')" + ); + + let parsed2 = ClickhouseEngine::try_from(serialized2.as_str()).unwrap(); + match parsed2 { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "prod_cluster"); + assert_eq!(target_database, "mydb"); + assert_eq!(target_table, "mytable"); + assert_eq!(sharding_key, None); + assert_eq!(policy_name, None); + } + _ => panic!("Expected Distributed engine"), + } + + // Test Distributed engine with sharding key but no policy + let engine3 = ClickhouseEngine::Distributed { + cluster: "test_cluster".to_string(), + target_database: "testdb".to_string(), + target_table: "testtable".to_string(), + sharding_key: Some("rand()".to_string()), + policy_name: None, + }; + + let serialized3: String = engine3.clone().into(); + assert_eq!( + serialized3, + "Distributed('test_cluster', 'testdb', 'testtable', rand())" + ); + + let parsed3 = ClickhouseEngine::try_from(serialized3.as_str()).unwrap(); + match parsed3 { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "test_cluster"); + assert_eq!(target_database, "testdb"); + assert_eq!(target_table, "testtable"); + assert_eq!(sharding_key, Some("rand()".to_string())); + assert_eq!(policy_name, None); + } + _ => panic!("Expected Distributed engine"), + } + + // Test edge case: policy_name without sharding_key should be silently dropped + // This matches ClickHouse specification where policy_name requires sharding_key + let engine4 = ClickhouseEngine::Distributed { + cluster: "edge_cluster".to_string(), + target_database: "edgedb".to_string(), + target_table: "edgetable".to_string(), + sharding_key: None, + policy_name: Some("orphan_policy".to_string()), // This should be dropped + }; + + let serialized4: String = engine4.clone().into(); + // policy_name should NOT appear since sharding_key is None + assert_eq!( + serialized4, + "Distributed('edge_cluster', 'edgedb', 'edgetable')" + ); + + // Round-trip should work correctly + let parsed4 = ClickhouseEngine::try_from(serialized4.as_str()).unwrap(); + match parsed4 { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "edge_cluster"); + assert_eq!(target_database, "edgedb"); + assert_eq!(target_table, "edgetable"); + assert_eq!(sharding_key, None); + assert_eq!(policy_name, None); // Both should be None after round-trip + } + _ => panic!("Expected Distributed engine"), + } + } + + #[test] + fn test_buffer_invalid_flush_combinations_logged() { + // Test: flush_rows without flush_time - should warn and ignore flush_rows + let engine = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db".to_string(), + target_table: "table".to_string(), + num_layers: 16, + min_time: 10, + max_time: 100, + min_rows: 10000, + max_rows: 100000, + min_bytes: 10000000, + max_bytes: 100000000, + flush_time: None, + flush_rows: Some(50000), // Invalid: no flush_time + flush_bytes: None, + }); + + let serialized: String = engine.clone().into(); + // flush_rows should be ignored, so only required params present + assert_eq!( + serialized, + "Buffer('db', 'table', 16, 10, 100, 10000, 100000, 10000000, 100000000)" + ); + + // Test: flush_bytes without flush_time or flush_rows - should warn and ignore flush_bytes + let engine2 = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db2".to_string(), + target_table: "table2".to_string(), + num_layers: 8, + min_time: 5, + max_time: 50, + min_rows: 5000, + max_rows: 50000, + min_bytes: 5000000, + max_bytes: 50000000, + flush_time: Some(3), + flush_rows: None, + flush_bytes: Some(25000000), // Invalid: no flush_rows + }); + + let serialized2: String = engine2.clone().into(); + // flush_bytes should be ignored, only flush_time present + assert_eq!( + serialized2, + "Buffer('db2', 'table2', 8, 5, 50, 5000, 50000, 5000000, 50000000, 3)" + ); + } + + #[test] + fn test_distributed_invalid_policy_without_sharding_logged() { + // Test: policy_name without sharding_key - should warn and ignore policy_name + let engine = ClickhouseEngine::Distributed { + cluster: "my_cluster".to_string(), + target_database: "db".to_string(), + target_table: "table".to_string(), + sharding_key: None, + policy_name: Some("orphan_policy".to_string()), // Invalid: no sharding_key + }; + + let serialized: String = engine.clone().into(); + // policy_name should be ignored + assert_eq!(serialized, "Distributed('my_cluster', 'db', 'table')"); + + // Verify round-trip works correctly + let parsed = ClickhouseEngine::try_from(serialized.as_str()).unwrap(); + match parsed { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "my_cluster"); + assert_eq!(target_database, "db"); + assert_eq!(target_table, "table"); + assert_eq!(sharding_key, None); + assert_eq!(policy_name, None); // Both should be None + } + _ => panic!("Expected Distributed engine"), + } + } + + #[test] + fn test_icebergs3_hash_consistency() { + // Test that identical engines produce identical hashes + let engine1 = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: Some("gzip".to_string()), + }; + + let engine2 = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: Some("gzip".to_string()), + }; + + let hash1 = engine1.non_alterable_params_hash(); + let hash2 = engine2.non_alterable_params_hash(); + assert_eq!(hash1, hash2); + assert_eq!(hash1.len(), 64); // SHA256 hex string + + // Test that credential changes produce different hashes + let engine_diff_key = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIADIFFERENT".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: Some("gzip".to_string()), + }; + let hash_diff_key = engine_diff_key.non_alterable_params_hash(); + assert_ne!( + hash1, hash_diff_key, + "Different access keys should produce different hashes" + ); + + // Test that path changes produce different hashes + let engine_diff_path = ClickhouseEngine::IcebergS3 { + path: "s3://different-bucket/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: Some("gzip".to_string()), + }; + let hash_diff_path = engine_diff_path.non_alterable_params_hash(); + assert_ne!( + hash1, hash_diff_path, + "Different paths should produce different hashes" + ); + + // Test that compression changes produce different hashes + let engine_no_compression = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: None, + }; + let hash_no_compression = engine_no_compression.non_alterable_params_hash(); + assert_ne!( + hash1, hash_no_compression, + "Different compression should produce different hashes" + ); + + // Test that IcebergS3 hash differs from other engines + let merge_tree = ClickhouseEngine::MergeTree; + assert_ne!(hash1, merge_tree.non_alterable_params_hash()); + } + + #[test] + fn test_icebergs3_display() { + // Test display with credentials + let engine_with_creds = ClickhouseEngine::IcebergS3 { + path: "s3://bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey123".to_string()), + compression: Some("gzip".to_string()), + }; + + let display: String = engine_with_creds.clone().into(); + assert!(display.contains("Iceberg")); + assert!(display.contains("s3://bucket/warehouse/table/")); + assert!(display.contains("AKIATEST")); + assert!(display.contains("secr...y123")); // Masked secret (first 4 + ... + last 4) + assert!(display.contains("Parquet")); + assert!(display.contains("gzip")); + + // Test display with NOSIGN + let engine_nosign = ClickhouseEngine::IcebergS3 { + path: "s3://public-bucket/table/".to_string(), + format: "ORC".to_string(), + aws_access_key_id: None, + aws_secret_access_key: None, + compression: None, + }; + + let display_nosign: String = engine_nosign.into(); + assert!(display_nosign.contains("Iceberg")); + assert!(display_nosign.contains("NOSIGN")); + assert!(display_nosign.contains("ORC")); + } + + #[test] + fn test_icebergs3_protobuf_serialization() { + // Test with credentials (should be excluded from proto) + let engine_with_creds = ClickhouseEngine::IcebergS3 { + path: "s3://bucket/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("key".to_string()), + aws_secret_access_key: Some("secret".to_string()), + compression: None, + }; + + let proto = engine_with_creds.to_proto_string(); + assert!(!proto.contains("key")); // Credentials excluded for security + assert!(!proto.contains("secret")); + assert!(proto.contains("s3://bucket/table/")); + assert!(proto.contains("Parquet")); + + // Test with compression (should be included in proto) + let engine_with_compression = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/events/".to_string(), + format: "ORC".to_string(), + aws_access_key_id: None, + aws_secret_access_key: None, + compression: Some("gzip".to_string()), + }; + + let proto_with_compression = engine_with_compression.to_proto_string(); + assert!(proto_with_compression.contains("s3://test-bucket/warehouse/events/")); + assert!(proto_with_compression.contains("ORC")); + assert!(proto_with_compression.contains("gzip")); // Compression IS included + } + + #[test] + fn test_icebergs3_parsing() { + // Test 1: Simple format without credentials or compression + let simple = "Iceberg('s3://bucket/table/', 'Parquet')"; + let engine = ClickhouseEngine::try_from(simple).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "Parquet"); + assert_eq!(aws_access_key_id, None); + assert_eq!(aws_secret_access_key, None); + assert_eq!(compression, None); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 2: With credentials (should be parsed now) + let with_creds = "Iceberg('s3://bucket/table/', 'AKIATEST', '[HIDDEN]', 'Parquet')"; + let engine = ClickhouseEngine::try_from(with_creds).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "Parquet"); + assert_eq!(aws_access_key_id, Some("AKIATEST".to_string())); + assert_eq!(aws_secret_access_key, Some("[HIDDEN]".to_string())); + assert_eq!(compression, None); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 3: With compression but no credentials - format at position 1 + let with_compression = "Iceberg('s3://bucket/table/', 'ORC', 'gzip')"; + let engine = ClickhouseEngine::try_from(with_compression).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + compression, + aws_access_key_id, + aws_secret_access_key, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "ORC"); + assert_eq!(compression, Some("gzip".to_string())); + assert_eq!(aws_access_key_id, None); + assert_eq!(aws_secret_access_key, None); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 4: Edge case - format name at position 1 with extra params (bug from bot review) + // This tests that we correctly identify format at position 1, not confuse it with credentials + let format_first = + "Iceberg('s3://bucket/table/', 'Parquet', 'extra_param', 'another_param')"; + let engine = ClickhouseEngine::try_from(format_first).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "Parquet"); + assert_eq!(aws_access_key_id, None); + assert_eq!(aws_secret_access_key, None); + // extra_param is treated as compression since it's at position 2 (extra_params_start) + assert_eq!(compression, Some("extra_param".to_string())); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 5: With NOSIGN + let with_nosign = "Iceberg('s3://public-bucket/table/', NOSIGN, 'Parquet')"; + let engine = ClickhouseEngine::try_from(with_nosign).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + .. + } => { + assert_eq!(path, "s3://public-bucket/table/"); + assert_eq!(format, "Parquet"); + assert_eq!(aws_access_key_id, None); + assert_eq!(aws_secret_access_key, None); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 6: With credentials AND compression + let full_config = "Iceberg('s3://bucket/table/', 'AKIATEST', 'secret', 'ORC', 'zstd')"; + let engine = ClickhouseEngine::try_from(full_config).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "ORC"); + assert_eq!(aws_access_key_id, Some("AKIATEST".to_string())); + assert_eq!(aws_secret_access_key, Some("secret".to_string())); + assert_eq!(compression, Some("zstd".to_string())); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 7: Invalid format in ambiguous case - should return error + let invalid_format = "Iceberg('s3://bucket/table/', 'InvalidFormat', 'something', 'else')"; + let result = ClickhouseEngine::try_from(invalid_format); + assert!( + result.is_err(), + "Should reject invalid format 'InvalidFormat'" + ); + + // Test 8: Another invalid format edge case + let another_invalid = "Iceberg('s3://bucket/table/', 'BadFormat', 'test')"; + let result2 = ClickhouseEngine::try_from(another_invalid); + assert!(result2.is_err(), "Should reject invalid format 'BadFormat'"); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs index f074294c18..8e2025ef16 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs @@ -5,11 +5,14 @@ use crate::infrastructure::olap::clickhouse::model::ClickHouseIndex; use sqlparser::ast::{ - Expr, ObjectName, Query, Select, SelectItem, Statement, TableFactor, TableWithJoins, + Expr, ObjectName, ObjectNamePart, Query, Select, SelectItem, SetExpr, Statement, TableFactor, + TableWithJoins, VisitMut, VisitorMut, }; use sqlparser::dialect::ClickHouseDialect; use sqlparser::parser::Parser; use std::collections::HashSet; +use std::ops::ControlFlow; +use std::sync::LazyLock; #[derive(Debug, Clone, PartialEq)] pub struct MaterializedViewStatement { @@ -247,6 +250,69 @@ pub fn extract_sample_by_from_create_table(sql: &str) -> Option { if let Some(i) = after_upper.find("PRIMARY KEY") { end = end.min(i); } + // Note: Match " TTL" with leading space to avoid matching substrings + // within identifiers (e.g., "cattle" contains "ttl") + if let Some(i) = after_upper.find(" TTL") { + end = end.min(i); + } + + let expr = after[..end].trim(); + if expr.is_empty() { + None + } else { + Some(expr.to_string()) + } +} + +/// Extract PRIMARY KEY expression from a CREATE TABLE statement +/// Returns the raw expression string that follows PRIMARY KEY, trimmed, +/// and stops before ORDER BY, SETTINGS, or end of statement +/// +/// Note: This extracts the PRIMARY KEY clause, which in ClickHouse is used +/// to specify a different primary key than the ORDER BY clause. +pub fn extract_primary_key_from_create_table(sql: &str) -> Option { + let upper = sql.to_uppercase(); + + // Find PRIMARY KEY that is NOT part of "ORDER BY PRIMARY KEY" + // We need to check that it's a standalone PRIMARY KEY clause + let mut primary_key_pos = None; + for (idx, _) in upper.match_indices("PRIMARY KEY") { + // Check if this is part of ORDER BY by looking at preceding text + let preceding_start = idx.saturating_sub(20); + let preceding = &upper[preceding_start..idx].trim(); + + // If preceded by ORDER BY, this is "ORDER BY PRIMARY KEY", not a standalone PRIMARY KEY + if !preceding.ends_with("ORDER BY") { + primary_key_pos = Some(idx); + break; + } + } + + let pos = primary_key_pos?; + + // After the keyword + let after = &sql[pos + "PRIMARY KEY".len()..]; + let after_upper = after.to_uppercase(); + + // Find earliest terminating keyword after PRIMARY KEY + // Clause order: PRIMARY KEY → PARTITION BY → ORDER BY → SAMPLE BY → SETTINGS → TTL + let mut end = after.len(); + if let Some(i) = after_upper.find("PARTITION BY") { + end = end.min(i); + } + if let Some(i) = after_upper.find("ORDER BY") { + end = end.min(i); + } + if let Some(i) = after_upper.find("SAMPLE BY") { + end = end.min(i); + } + if let Some(i) = after_upper.find(" SETTINGS") { + end = end.min(i); + } + // Note: Match " TTL" with leading space to avoid matching substrings + if let Some(i) = after_upper.find(" TTL") { + end = end.min(i); + } let expr = after[..end].trim(); if expr.is_empty() { @@ -401,6 +467,166 @@ pub fn extract_indexes_from_create_table(sql: &str) -> Result { + default_database: &'a str, +} + +impl<'a> VisitorMut for Normalizer<'a> { + type Break = (); + + fn pre_visit_table_factor( + &mut self, + table_factor: &mut TableFactor, + ) -> ControlFlow { + if let TableFactor::Table { name, .. } = table_factor { + // Strip default database prefix + if name.0.len() == 2 { + if let ObjectNamePart::Identifier(ident) = &name.0[0] { + if ident.value.eq_ignore_ascii_case(self.default_database) { + name.0.remove(0); + } + } + } + // Unquote table names + for part in &mut name.0 { + if let ObjectNamePart::Identifier(ident) = part { + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + } + } + ControlFlow::Continue(()) + } + + fn pre_visit_expr(&mut self, expr: &mut Expr) -> ControlFlow { + match expr { + Expr::Identifier(ident) => { + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + Expr::Function(func) => { + // Uppercase function names (e.g. count -> COUNT) + if let Some(ObjectNamePart::Identifier(ident)) = func.name.0.last_mut() { + let upper = ident.value.to_uppercase(); + if matches!( + upper.as_str(), + "COUNT" + | "SUM" + | "AVG" + | "MIN" + | "MAX" + | "ABS" + | "COALESCE" + | "IF" + | "DISTINCT" + ) { + ident.value = upper; + } + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + } + _ => {} + } + ControlFlow::Continue(()) + } + + fn pre_visit_statement(&mut self, statement: &mut Statement) -> ControlFlow { + if let Statement::CreateView { name, to, .. } = statement { + // Strip default database prefix from view name + if name.0.len() == 2 { + if let ObjectNamePart::Identifier(ident) = &name.0[0] { + if ident.value.eq_ignore_ascii_case(self.default_database) { + name.0.remove(0); + } + } + } + + for part in &mut name.0 { + if let ObjectNamePart::Identifier(ident) = part { + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + } + if let Some(to_name) = to { + // Strip default database prefix from TO table + if to_name.0.len() == 2 { + if let ObjectNamePart::Identifier(ident) = &to_name.0[0] { + if ident.value.eq_ignore_ascii_case(self.default_database) { + to_name.0.remove(0); + } + } + } + + for part in &mut to_name.0 { + if let ObjectNamePart::Identifier(ident) = part { + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + } + } + } + ControlFlow::Continue(()) + } + + fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow { + // Handle SELECT items (including aliases) + if let SetExpr::Select(select) = &mut *query.body { + for item in &mut select.projection { + if let SelectItem::ExprWithAlias { alias, .. } = item { + alias.quote_style = None; + alias.value = alias.value.replace('`', ""); + } + } + } + ControlFlow::Continue(()) + } +} + +pub fn normalize_sql_for_comparison(sql: &str, default_database: &str) -> String { + // 1. Parse with sqlparser (AST-based structural normalization) + // This handles stripping default database prefixes (e.g., `local.Table` -> `Table`) + // and basic unquoting where the parser understands the structure. + let dialect = ClickHouseDialect {}; + let intermediate = match Parser::parse_sql(&dialect, sql) { + Ok(mut ast) => { + if ast.is_empty() { + return sql.trim().to_string(); + } + + // 2. Walk AST to normalize (strip database prefixes, unquote) + let mut normalizer = Normalizer { default_database }; + for statement in &mut ast { + let _ = statement.visit(&mut normalizer); + } + + // 3. Convert back to string + ast[0].to_string() + } + Err(_e) => { + // Fallback if parsing fails: rudimentary string replacement + let mut result = sql.to_string(); + if !default_database.is_empty() { + let prefix_pattern = format!("{}.", default_database); + result = result.replace(&prefix_pattern, ""); + } + result + } + }; + + intermediate.trim().to_string() +} + pub fn parse_create_materialized_view( sql: &str, ) -> Result { @@ -442,7 +668,7 @@ pub fn parse_create_materialized_view( let select_statement = format!("{}", query); // Extract source tables from the query - let source_tables = extract_source_tables_from_query(query)?; + let source_tables = extract_source_tables_from_query_ast(query)?; Ok(MaterializedViewStatement { view_name, @@ -478,7 +704,7 @@ pub fn parse_insert_select(sql: &str) -> Result String { format!("{}", name).replace('`', "") } -fn split_qualified_name(name: &str) -> (Option, String) { +pub fn split_qualified_name(name: &str) -> (Option, String) { if let Some(dot_pos) = name.rfind('.') { let database = name[..dot_pos].to_string(); let table = name[dot_pos + 1..].to_string(); @@ -520,7 +746,67 @@ fn split_qualified_name(name: &str) -> (Option, String) { } } -fn extract_source_tables_from_query(query: &Query) -> Result, SqlParseError> { +pub fn extract_source_tables_from_query(sql: &str) -> Result, SqlParseError> { + let dialect = ClickHouseDialect {}; + let ast = Parser::parse_sql(&dialect, sql)?; + + if ast.len() != 1 { + // Should be exactly one query + return Err(SqlParseError::UnsupportedStatement); + } + + if let Statement::Query(query) = &ast[0] { + extract_source_tables_from_query_ast(query) + } else { + Err(SqlParseError::UnsupportedStatement) + } +} + +static FROM_JOIN_TABLE_PATTERN: LazyLock = LazyLock::new(|| { + // Pattern to extract table names from FROM and JOIN clauses + // Matches: FROM schema.table, JOIN schema.table, FROM table, etc. + // Captures optional schema and required table name + regex::Regex::new(r"(?i)\b(?:FROM|JOIN)\s+(?:([a-zA-Z0-9_`]+)\.)?([a-zA-Z0-9_`]+)") + .expect("FROM_JOIN_TABLE_PATTERN regex should compile") +}); + +/// Extracts table names from a SQL query using regex fallback. +/// Used when the standard SQL parser fails (e.g., ClickHouse-specific syntax like array literals). +/// +/// This is a simplified fallback that pattern-matches FROM/JOIN clauses rather than +/// parsing the full AST. It won't catch tables in subqueries, but it's sufficient for +/// basic dependency tracking when full parsing isn't possible. +pub fn extract_source_tables_from_query_regex( + sql: &str, + default_database: &str, +) -> Result, SqlParseError> { + let mut tables = Vec::new(); + + for captures in FROM_JOIN_TABLE_PATTERN.captures_iter(sql) { + let database = captures.get(1).map(|m| m.as_str().replace('`', "")); + let table = captures + .get(2) + .map(|m| m.as_str().replace('`', "")) + .ok_or(SqlParseError::UnsupportedStatement)?; + + tables.push(TableReference { + database: database.or_else(|| Some(default_database.to_string())), + table, + alias: None, + }); + } + + if tables.is_empty() { + // No tables found - this might be a problem, but don't fail hard + // The view might have tables in subqueries that regex can't catch + } + + Ok(tables) +} + +fn extract_source_tables_from_query_ast( + query: &Query, +) -> Result, SqlParseError> { let mut tables = HashSet::new(); extract_tables_from_query_recursive(query, &mut tables)?; Ok(tables.into_iter().collect()) @@ -1420,6 +1706,132 @@ pub mod tests { ); } + #[test] + fn test_extract_sample_by_with_ttl_single_line() { + // When parsing CREATE TABLE with both SAMPLE BY and TTL, + // the parser needs to stop at TTL keyword to avoid capturing the TTL expression. + // + // Bug: Parser only checked for ORDER BY, SETTINGS, and PRIMARY KEY as terminators, + // so it extracted "sample_hash TTL toDateTime(...)" instead of just "sample_hash". + // + // This primarily affected tables created outside Moose (not in state storage). + // For Moose-managed tables, the correct value from state storage was used instead. + // Customer reported this when migrating external tables. + let sql = "CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree ORDER BY (hour_stamp, sample_hash, ts) SAMPLE BY sample_hash TTL toDateTime(ts / 1000) + toIntervalDay(30) SETTINGS index_granularity = 8192"; + assert_eq!( + extract_sample_by_from_create_table(sql), + Some("sample_hash".to_string()) + ); + } + + #[test] + fn test_extract_sample_by_with_identifier_containing_ttl() { + // Edge case: Ensure identifiers containing "ttl" substring don't cause false matches + // "cattle" contains "ttl" when uppercased, but shouldn't be treated as TTL keyword + let sql = "CREATE TABLE t (id UInt64, cattle_count UInt64) ENGINE = MergeTree ORDER BY id SAMPLE BY cattle_count SETTINGS index_granularity = 8192"; + assert_eq!( + extract_sample_by_from_create_table(sql), + Some("cattle_count".to_string()) + ); + } + + // Tests for extract_primary_key_from_create_table + #[test] + fn test_extract_primary_key_simple() { + let sql = r#"CREATE TABLE t (id UInt64, name String) ENGINE = MergeTree PRIMARY KEY id ORDER BY id"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_tuple() { + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY (id, ts) ORDER BY (id, ts)"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("(id, ts)".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_with_expression() { + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY (id, toYYYYMM(ts)) ORDER BY (id, ts)"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("(id, toYYYYMM(ts))".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_order_by_primary_key() { + // Test that we DON'T extract "ORDER BY PRIMARY KEY" as a PRIMARY KEY clause + let sql = r#"CREATE TABLE t (id UInt64) ENGINE = MergeTree ORDER BY PRIMARY KEY id"#; + assert_eq!(extract_primary_key_from_create_table(sql), None); + } + + #[test] + fn test_extract_primary_key_with_settings() { + let sql = r#"CREATE TABLE t (id UInt64, name String) ENGINE = MergeTree PRIMARY KEY id ORDER BY id SETTINGS index_granularity = 8192"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_no_primary_key() { + let sql = r#"CREATE TABLE t (id UInt64) ENGINE = MergeTree ORDER BY id"#; + assert_eq!(extract_primary_key_from_create_table(sql), None); + } + + #[test] + fn test_extract_primary_key_nested_objects() { + // NESTED_OBJECTS_SQL has "PRIMARY KEY id" + assert_eq!( + extract_primary_key_from_create_table(NESTED_OBJECTS_SQL), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_with_sample_by() { + let sql = r#"CREATE TABLE t (id UInt64, hash UInt64) ENGINE = MergeTree PRIMARY KEY id SAMPLE BY hash ORDER BY (id, hash)"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_with_ttl() { + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY id ORDER BY id TTL ts + INTERVAL 30 DAY"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_with_partition_by() { + // Test that PRIMARY KEY stops at PARTITION BY clause + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY id PARTITION BY toYYYYMM(ts) ORDER BY id"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_tuple_with_partition_by() { + // Test that PRIMARY KEY with tuple stops at PARTITION BY + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY (id, ts) PARTITION BY toYYYYMM(ts) ORDER BY (id, ts)"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("(id, ts)".to_string()) + ); + } + #[test] fn test_extract_indexes_from_create_table_multiple() { let sql = "CREATE TABLE local.table_name (`u64` UInt64, `i32` Int32, `s` String, \ @@ -1523,4 +1935,139 @@ pub mod tests { let indexes = extract_indexes_from_create_table(NESTED_OBJECTS_SQL).unwrap(); assert_eq!(indexes.len(), 0); } + + #[test] + fn test_normalize_sql_removes_backticks() { + let input = "SELECT `column1`, `column2` FROM `table_name`"; + let result = normalize_sql_for_comparison(input, ""); + assert!(!result.contains('`')); + assert!(result.contains("column1")); + assert!(result.contains("table_name")); + } + + #[test] + fn test_normalize_sql_uppercases_keywords() { + let input = "select count(id) as total from users where active = true"; + let result = normalize_sql_for_comparison(input, ""); + assert!(result.contains("SELECT")); + assert!(result.contains("COUNT")); + assert!(result.contains("AS")); + assert!(result.contains("FROM")); + assert!(result.contains("WHERE")); + } + + #[test] + fn test_normalize_sql_collapses_whitespace() { + let input = "SELECT\n col1,\n col2\n FROM\n my_table"; + let result = normalize_sql_for_comparison(input, ""); + assert!(!result.contains('\n')); + assert_eq!(result, "SELECT col1, col2 FROM my_table"); + } + + #[test] + fn test_normalize_sql_removes_database_prefix() { + let input = "SELECT * FROM mydb.table1 JOIN mydb.table2"; + let result = normalize_sql_for_comparison(input, "mydb"); + assert!(!result.contains("mydb.")); + assert!(result.contains("table1")); + assert!(result.contains("table2")); + } + + #[test] + fn test_normalize_sql_comprehensive() { + // Test with all differences at once + let user_sql = "CREATE MATERIALIZED VIEW IF NOT EXISTS `MV`\n TO `Target`\n AS SELECT\n count(`id`) as total\n FROM `Source`"; + let ch_sql = "CREATE MATERIALIZED VIEW IF NOT EXISTS MV TO Target AS SELECT COUNT(id) AS total FROM Source"; + + let normalized_user = normalize_sql_for_comparison(user_sql, ""); + let normalized_ch = normalize_sql_for_comparison(ch_sql, ""); + + assert_eq!(normalized_user, normalized_ch); + } + + #[test] + fn test_normalize_sql_with_database_prefix() { + let user_sql = "CREATE VIEW `MyView` AS SELECT `col` FROM `MyTable`"; + let ch_sql = "CREATE VIEW local.MyView AS SELECT col FROM local.MyTable"; + + let normalized_user = normalize_sql_for_comparison(user_sql, "local"); + let normalized_ch = normalize_sql_for_comparison(ch_sql, "local"); + + assert_eq!(normalized_user, normalized_ch); + } + + #[test] + fn test_normalize_sql_handles_backticks_on_reserved_keyword_aliases() { + // ClickHouse automatically adds backticks around reserved keywords like "table" + let ch_sql = "CREATE MATERIALIZED VIEW mv AS SELECT date, 'value' AS `table` FROM source"; + // User code typically doesn't have backticks + let user_sql = "CREATE MATERIALIZED VIEW mv AS SELECT date, 'value' AS table FROM source"; + + let normalized_ch = normalize_sql_for_comparison(ch_sql, ""); + let normalized_user = normalize_sql_for_comparison(user_sql, ""); + + assert_eq!(normalized_ch, normalized_user); + // Both should normalize to the version without backticks + assert!(normalized_ch.contains("AS table")); + assert!(!normalized_ch.contains("AS `table`")); + } + + #[test] + fn test_extract_source_tables_with_standard_sql() { + let sql = "SELECT a.id, b.name FROM users a JOIN orders b ON a.id = b.user_id"; + let result = extract_source_tables_from_query(sql).unwrap(); + + assert_eq!(result.len(), 2); + let table_names: Vec<&str> = result.iter().map(|t| t.table.as_str()).collect(); + assert!(table_names.contains(&"users")); + assert!(table_names.contains(&"orders")); + } + + #[test] + fn test_extract_source_tables_regex_fallback_with_clickhouse_array_literals() { + // Reproduces customer bug: ClickHouse array literal syntax ['item1', 'item2'] + // causes standard SQL parser to fail at the '[' character. + // This tests the regex fallback successfully extracts tables despite parse failure. + let sql = r#" + SELECT name, count() as total + FROM mydb.endpoint_process + WHERE arrayExists(x -> (lower(name) LIKE x), ['pattern1', 'pattern2']) + AND status NOT IN ['completed', 'failed'] + GROUP BY name + "#; + + // Standard parser should fail on '[' in array literals + let parse_result = extract_source_tables_from_query(sql); + assert!( + parse_result.is_err(), + "Expected parser to fail on ClickHouse array syntax" + ); + + // Regex fallback should succeed and extract the correct table with schema + let result = extract_source_tables_from_query_regex(sql, "default").unwrap(); + + assert_eq!(result.len(), 1); + assert_eq!(result[0].table, "endpoint_process"); + assert_eq!(result[0].database, Some("mydb".to_string())); + } + + #[test] + fn test_extract_source_tables_regex_handles_joins_and_defaults() { + // Tests regex fallback extracts FROM/JOIN tables, handles backticks, + // and applies default database to unqualified names + let sql = "SELECT * FROM `schema1`.`table1` JOIN table2 ON table1.id = table2.id"; + + let result = extract_source_tables_from_query_regex(sql, "default_db").unwrap(); + + assert_eq!(result.len(), 2); + + let tables: Vec<(Option, String)> = result + .iter() + .map(|t| (t.database.clone(), t.table.clone())) + .collect(); + + // table1 has schema, table2 gets default_db + assert!(tables.contains(&(Some("schema1".to_string()), "table1".to_string()))); + assert!(tables.contains(&(Some("default_db".to_string()), "table2".to_string()))); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs deleted file mode 100644 index 5267529d33..0000000000 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs +++ /dev/null @@ -1,360 +0,0 @@ -/// # ClickHouse Alternative Client Module -/// -/// This module provides an alternative client implementation for interacting with ClickHouse. -/// It focuses on JSON serialization of query results. -/// -/// The module includes functionality for: -/// - Converting ClickHouse data types to JSON -/// - Querying tables and returning results as JSON -/// -/// This client is used primarily for data exploration (e.g., the peek command). -use std::num::TryFromIntError; -use std::str::FromStr; -use std::time::Duration; - -use chrono::{DateTime, Days, NaiveDate}; -use clickhouse_rs::errors::FromSqlError; -use clickhouse_rs::types::{ColumnType, Row}; -use clickhouse_rs::types::{FromSql, FromSqlResult, Options, ValueRef}; -use clickhouse_rs::ClientHandle; -use futures::stream::BoxStream; -use futures::StreamExt; -use itertools::{Either, Itertools}; -use log::{info, warn}; -use serde::Serialize; -use serde_json::{json, Map, Value}; - -use crate::framework::core::infrastructure::table::{EnumValue, OrderBy}; -use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; -use crate::infrastructure::olap::clickhouse::model::{ - wrap_and_join_column_names, ClickHouseColumnType, ClickHouseTable, -}; - -/// Creates a ClickHouse connection pool with the provided configuration. -/// -/// # Arguments -/// * `click_house_config` - ClickHouse configuration -/// -/// # Returns -/// * `clickhouse_rs::Pool` - Connection pool for ClickHouse -pub fn get_pool(click_house_config: &ClickHouseConfig) -> clickhouse_rs::Pool { - let address = format!( - "tcp://{}:{}", - click_house_config.host, click_house_config.native_port - ); - - if click_house_config.use_ssl && click_house_config.native_port == 9000 { - warn!( - "The default secure native port is 9440 instead of 9000. You may get a timeout error." - ) - } - - clickhouse_rs::Pool::new( - Options::from_str(&address) - .unwrap() - .secure(click_house_config.use_ssl) - .connection_timeout(Duration::from_secs(20)) - .database(&click_house_config.db_name) - .username(&click_house_config.user) - .password(&click_house_config.password), - ) -} - -/// Wrapper for ValueRef to implement FromSql trait. -struct ValueRefWrapper<'a>(ValueRef<'a>); -impl<'a> FromSql<'a> for ValueRefWrapper<'a> { - fn from_sql(value: ValueRef<'a>) -> FromSqlResult> { - Ok(ValueRefWrapper(value)) - } -} - -/// Converts a ClickHouse ValueRef to a JSON Value. -/// -/// This function handles all ClickHouse data types and converts them to appropriate -/// JSON representations. It also handles enum mappings for string enums. -/// -/// # Arguments -/// * `value_ref` - ClickHouse value reference -/// * `enum_mapping` - Optional mapping for enum values -/// -/// # Returns -/// * `Result` - JSON value or error -fn value_to_json( - value_ref: &ValueRef, - enum_mapping: &Option>, -) -> Result { - let result = match value_ref { - ValueRef::Bool(v) => json!(v), - ValueRef::UInt8(v) => json!(v), - ValueRef::UInt16(v) => json!(v), - ValueRef::UInt32(v) => json!(v), - ValueRef::UInt64(v) => json!(v), - ValueRef::Int8(v) => json!(v), - ValueRef::Int16(v) => json!(v), - ValueRef::Int32(v) => json!(v), - ValueRef::Int64(v) => json!(v), - // TODO: base64 encode if type is Bytes (probably Uint8Array in TS) - // In clickhouse the String type means arbitrary bytes - ValueRef::String(v) => json!(String::from_utf8_lossy(v)), - ValueRef::Float32(v) => json!(v), - ValueRef::Float64(v) => json!(v), - ValueRef::Date(v) => { - let unix_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let naive_date = unix_epoch.checked_add_days(Days::new((*v).into())).ok_or( - clickhouse_rs::errors::Error::FromSql(FromSqlError::OutOfRange), - )?; - json!(naive_date.to_string()) - } - - // in the following two cases the timezones are dropped - ValueRef::DateTime(t, _tz) => { - json!(DateTime::from_timestamp((*t).into(), 0) - .ok_or(clickhouse_rs::errors::Error::FromSql( - FromSqlError::OutOfRange - ))? - .to_rfc3339()) - } - ValueRef::DateTime64(value, (precision, _tz)) => { - // See to_datetime_opt in clickhouse-rs - let base10: i64 = 10; - - let nano = if *precision < 19 { - value * base10.pow(9 - precision) - } else { - 0_i64 - }; - - let sec = nano / 1_000_000_000; - let nsec: u32 = (nano - sec * 1_000_000_000).try_into().unwrap(); // always in range - - json!(DateTime::from_timestamp(sec, nsec).ok_or( - clickhouse_rs::errors::Error::FromSql(FromSqlError::OutOfRange) - )?) - } - - ValueRef::Nullable(Either::Left(_)) => Value::Null, - ValueRef::Nullable(Either::Right(v)) => value_to_json(v.as_ref(), enum_mapping)?, - ValueRef::Array(_t, values) => json!(values - .iter() - .map(|v| value_to_json(v, enum_mapping)) - .collect::, clickhouse_rs::errors::Error>>()?), - ValueRef::Decimal(d) => json!(f64::from(d.clone())), // consider using arbitrary_precision in serde_json - ValueRef::Uuid(_) => json!(value_ref.to_string()), - ValueRef::Enum16(_mapping, i) => convert_enum(i.internal(), enum_mapping), - ValueRef::Enum8(_mapping, i) => convert_enum(i.internal(), enum_mapping), - ValueRef::Ipv4(ip) => { - let ip_str = format!("{}.{}.{}.{}", ip[0], ip[1], ip[2], ip[3]); - json!(ip_str) - } - ValueRef::Ipv6(ip) => { - json!(ip - .chunks(2) - .map(|chunk| { format!("{:02x}{:02x}", chunk[0], chunk[1]) }) - .join(":")) - } - ValueRef::Map(_, _, m) => Value::Object( - m.iter() - .map(|(k, v)| { - Ok::<_, clickhouse_rs::errors::Error>(( - k.to_string(), - value_to_json(v, enum_mapping)?, - )) - }) - .collect::>()?, - ), - }; - Ok(result) -} - -/// Converts an enum value to a JSON value. -/// -/// This function handles both integer and string enums. For string enums, -/// it uses the provided mapping to convert the integer value to a string. -/// -/// # Arguments -/// * `i` - Enum integer value -/// * `enum_mapping` - Optional mapping for enum values -/// -/// # Returns -/// * `Value` - JSON value for the enum -fn convert_enum(i: I, enum_mapping: &Option>) -> Value -where - I: Serialize, - usize: TryFrom, -{ - match enum_mapping { - None => json!(i), - // unwrap is safe because of the invariant - - // enum_mapping is Some only when the TS enum has string values - Some(values) => json!(values[usize::try_from(i).unwrap() - 1]), - } -} - -/// Converts a ClickHouse row to a JSON object. -/// -/// This function converts each column in the row to a JSON value and -/// combines them into a JSON object. -/// -/// # Arguments -/// * `row` - ClickHouse row -/// * `enum_mappings` - Enum mappings for each column -/// -/// # Returns -/// * `Result` - JSON object or error -fn row_to_json( - row: &Row<'_, C>, - enum_mappings: &[Option>], -) -> Result -where - C: ColumnType, -{ - // can we use visitors to construct the JSON string directly, - // without constructing the Value::Object first - let mut result = Map::with_capacity(row.len()); - - for (i, enum_mapping) in enum_mappings.iter().enumerate() { - let value = value_to_json(&row.get::(i).unwrap().0, enum_mapping); - result.insert(row.name(i)?.into(), value?); - } - Ok(Value::Object(result)) -} - -/// Converts a ClickHouse column type to an enum mapping. -/// -/// This function extracts the enum mapping from a ClickHouse column type -/// if it's an enum with string values. -/// -/// # Arguments -/// * `t` - ClickHouse column type -/// -/// # Returns -/// * `Option>` - Enum mapping or None -fn column_type_to_enum_mapping(t: &ClickHouseColumnType) -> Option> { - match t { - ClickHouseColumnType::String - | ClickHouseColumnType::FixedString(_) - | ClickHouseColumnType::Boolean - | ClickHouseColumnType::ClickhouseInt(_) - | ClickHouseColumnType::ClickhouseFloat(_) - | ClickHouseColumnType::Decimal { .. } - | ClickHouseColumnType::DateTime - | ClickHouseColumnType::Date32 - | ClickHouseColumnType::Date - | ClickHouseColumnType::Map(_, _) - | ClickHouseColumnType::DateTime64 { .. } - | ClickHouseColumnType::IpV4 - | ClickHouseColumnType::IpV6 - | ClickHouseColumnType::Json(_) - | ClickHouseColumnType::Uuid - | ClickHouseColumnType::AggregateFunction { .. } - | ClickHouseColumnType::SimpleAggregateFunction { .. } - | ClickHouseColumnType::Bytes => None, - ClickHouseColumnType::Array(t) => column_type_to_enum_mapping(t.as_ref()), - ClickHouseColumnType::NamedTuple(_) | ClickHouseColumnType::Nested(_) => { - // Not entire sure I understand what this method does... do we just ignore the nested type? - todo!("Implement the nested type mapper") - } - // Geometry types have no enum mapping - ClickHouseColumnType::Point - | ClickHouseColumnType::Ring - | ClickHouseColumnType::LineString - | ClickHouseColumnType::MultiLineString - | ClickHouseColumnType::Polygon - | ClickHouseColumnType::MultiPolygon => None, - ClickHouseColumnType::Enum(values) => values.values.first().and_then(|m| match m.value { - EnumValue::Int(_) => None, - EnumValue::String(_) => Some( - values - .values - .iter() - .map(|member| match &member.value { - EnumValue::Int(_) => panic!("Mixed enum values."), - EnumValue::String(s) => s.as_str(), - }) - .collect::>(), - ), - }), - ClickHouseColumnType::Nullable(inner) => column_type_to_enum_mapping(inner), - ClickHouseColumnType::LowCardinality(inner) => column_type_to_enum_mapping(inner), - } -} - -/// Executes a SELECT query and returns the results as a stream of JSON objects. -/// -/// # Arguments -/// * `db_name` - Database name -/// * `table` - Table to query -/// * `client` - ClickHouse client -/// * `limit_offset_clause` - LIMIT/OFFSET clause for the query -/// -/// # Returns -/// * `Result>, clickhouse_rs::errors::Error>` - Stream of JSON objects or error -async fn select_as_json<'a>( - db_name: &str, - table: &'a ClickHouseTable, - client: &'a mut ClientHandle, - limit_offset_clause: &str, -) -> Result>, clickhouse_rs::errors::Error> -{ - let enum_mapping: Vec>> = table - .columns - .iter() - .map(|c| column_type_to_enum_mapping(&c.column_type)) - .collect(); - - let order_by = match &table.order_by { - OrderBy::Fields(v) if !v.is_empty() => { - format!("ORDER BY {}", wrap_and_join_column_names(v, ", ")) - } - OrderBy::SingleExpr(expr) => format!("ORDER BY {expr}"), - _ => { - // Fall back to primary key columns only if no explicit order_by is specified - let key_columns: Vec = table - .primary_key_columns() - .iter() - .map(|s| s.to_string()) - .collect(); - - if key_columns.is_empty() { - "".to_string() - } else { - format!( - "ORDER BY {}", - wrap_and_join_column_names(&key_columns, ", ") - ) - } - } - }; - - let query = &format!( - "select * from \"{}\".\"{}\" {} {}", - db_name, table.name, order_by, limit_offset_clause - ); - info!("select_as_json query: {}", query); - let stream = client - .query(query) - .stream() - .map(move |row| row_to_json(&row?, &enum_mapping)); - info!("select_as_json got data load stream."); - Ok(Box::pin(stream)) -} - -/// Executes a SELECT query with a LIMIT clause and returns the results as a stream of JSON objects. -/// -/// # Arguments -/// * `db_name` - Database name -/// * `table` - Table to query -/// * `client` - ClickHouse client -/// * `limit` - Limit for the query -/// -/// # Returns -/// * `Result>, clickhouse_rs::errors::Error>` - Stream of JSON objects or error -pub async fn select_some_as_json<'a>( - db_name: &str, - table: &'a ClickHouseTable, - client: &'a mut ClientHandle, - limit: i64, -) -> Result>, clickhouse_rs::errors::Error> -{ - select_as_json(db_name, table, client, &format!("limit {limit}")).await -} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs new file mode 100644 index 0000000000..c62a09aea6 --- /dev/null +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs @@ -0,0 +1,108 @@ +//! HTTP-based ClickHouse client for query operations +//! +//! This module provides query functionality using the HTTP-based `clickhouse` crate. +//! Unlike the native protocol client (clickhouse-rs), this client: +//! - Supports all ClickHouse types including LowCardinality +//! - Uses JSON format for data serialization +//! - Is actively maintained +//! - Aligns with how consumption APIs access ClickHouse + +use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::{create_client, ConfiguredDBClient}; +use serde_json::Value; +use tracing::debug; + +/// Create a configured HTTP client for query operations +/// +/// # Arguments +/// * `clickhouse_config` - ClickHouse configuration +/// +/// # Returns +/// * `ConfiguredDBClient` - Configured client ready for queries +pub fn create_query_client(clickhouse_config: &ClickHouseConfig) -> ConfiguredDBClient { + create_client(clickhouse_config.clone()) +} + +/// Execute a SELECT query and return results as JSON +/// +/// # Arguments +/// * `client` - Configured ClickHouse client +/// * `query` - SQL query string +/// +/// # Returns +/// * Vec of JSON objects (one per row) +/// +/// # Implementation Note +/// Uses direct HTTP request to ClickHouse with JSONEachRow format since the +/// clickhouse crate doesn't natively support serde_json::Value deserialization. +pub async fn query_as_json_stream( + client: &ConfiguredDBClient, + query: &str, +) -> Result, Box> { + debug!("Executing HTTP query: {}", query); + + let config = &client.config; + let protocol = if config.use_ssl { "https" } else { "http" }; + let url = format!("{}://{}:{}", protocol, config.host, config.host_port); + + // Use reqwest to make a raw HTTP request with JSONEachRow format + let http_client = reqwest::Client::new(); + let response = http_client + .post(&url) + .query(&[("database", &config.db_name)]) + .query(&[("default_format", "JSONEachRow")]) + .basic_auth(&config.user, Some(&config.password)) + .body(query.to_string()) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let error_text = response.text().await.unwrap_or_default(); + return Err(format!("ClickHouse query failed ({}): {}", status, error_text).into()); + } + + let text = response.text().await?; + + // Parse each line as a separate JSON object (JSONEachRow format) + let mut results = Vec::new(); + for line in text.lines() { + if !line.trim().is_empty() { + let value: Value = serde_json::from_str(line)?; + results.push(value); + } + } + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + #[ignore] // Requires running ClickHouse instance + async fn test_query_as_json_stream() { + let config = ClickHouseConfig { + db_name: "default".to_string(), + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + user: "default".to_string(), + password: "".to_string(), + use_ssl: false, + host_data_path: None, + additional_databases: vec![], + clusters: None, + }; + + let client = create_query_client(&config); + let rows = query_as_json_stream(&client, "SELECT 1 as num, 'test' as text") + .await + .expect("Query should succeed"); + + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["num"], 1); + assert_eq!(rows[0]["text"], "test"); + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index 493ec82196..aeefa8144d 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -181,6 +181,7 @@ impl AtomicOlapOperation { } => SerializableOlapOperation::DropTable { table: table.name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::AddTableColumn { table, @@ -192,6 +193,7 @@ impl AtomicOlapOperation { column: column.clone(), after_column: after_column.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::DropTableColumn { table, @@ -201,6 +203,7 @@ impl AtomicOlapOperation { table: table.name.clone(), column_name: column_name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableColumn { table, @@ -212,6 +215,7 @@ impl AtomicOlapOperation { before_column: before_column.clone(), after_column: after_column.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableSettings { table, @@ -223,6 +227,7 @@ impl AtomicOlapOperation { before_settings: before_settings.clone(), after_settings: after_settings.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableTtl { table, @@ -234,12 +239,14 @@ impl AtomicOlapOperation { before: before.clone(), after: after.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::AddTableIndex { table, index, .. } => { SerializableOlapOperation::AddTableIndex { table: table.name.clone(), index: index.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), } } AtomicOlapOperation::DropTableIndex { @@ -248,6 +255,7 @@ impl AtomicOlapOperation { table: table.name.clone(), index_name: index_name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifySampleBy { table, expression, .. @@ -255,11 +263,13 @@ impl AtomicOlapOperation { table: table.name.clone(), expression: expression.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::RemoveSampleBy { table, .. } => { SerializableOlapOperation::RemoveSampleBy { table: table.name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), } } AtomicOlapOperation::PopulateMaterializedView { @@ -1228,7 +1238,7 @@ fn order_operations_by_dependencies( // Check if adding this edge created a cycle if petgraph::algo::is_cyclic_directed(&graph) { - log::debug!("Cycle detected while adding edge"); + tracing::debug!("Cycle detected while adding edge"); return Err(PlanOrderingError::CyclicDependency); } } @@ -1237,14 +1247,14 @@ fn order_operations_by_dependencies( // Also check for cycles after all edges are added if petgraph::algo::is_cyclic_directed(&graph) { - log::debug!("Cycle detected after adding all edges"); + tracing::debug!("Cycle detected after adding all edges"); return Err(PlanOrderingError::CyclicDependency); } // If no edges were added, just return operations in original order // This handles cases where signatures were invalid or not found if edge_count == 0 && operations.len() > 1 { - log::debug!("No edges were added to the graph"); + tracing::debug!("No edges were added to the graph"); return Ok(operations.to_vec()); } @@ -1252,7 +1262,7 @@ fn order_operations_by_dependencies( let sorted_indices = match toposort(&graph, None) { Ok(indices) => indices, Err(err) => { - log::debug!( + tracing::debug!( "Cycle detected during topological sort: {:?}", err.node_id() ); @@ -1298,7 +1308,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1311,6 +1321,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create some atomic operations @@ -1372,7 +1384,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1385,6 +1397,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create table B - depends on table A @@ -1394,7 +1408,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1407,6 +1421,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create view C - depends on table B @@ -1488,7 +1504,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1501,6 +1517,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create table B - target for materialized view @@ -1510,7 +1528,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1523,6 +1541,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create view C - depends on table B @@ -1624,7 +1644,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1637,6 +1657,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let view = View { @@ -1780,7 +1802,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1793,6 +1815,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let table_b = Table { @@ -1801,7 +1825,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1814,6 +1838,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let table_c = Table { @@ -1822,7 +1848,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1835,6 +1861,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Test operations @@ -1912,7 +1940,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1925,6 +1953,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let table_b = Table { @@ -1933,7 +1963,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1946,6 +1976,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let table_c = Table { @@ -1954,7 +1986,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1967,6 +1999,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let table_d = Table { @@ -1975,7 +2009,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1988,6 +2022,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let table_e = Table { @@ -1996,7 +2032,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2009,6 +2045,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let op_create_a = AtomicOlapOperation::CreateTable { @@ -2149,7 +2187,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2162,6 +2200,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create table B - target for materialized view @@ -2171,7 +2211,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2184,11 +2224,14 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create SQL resource for a materialized view let mv_sql_resource = SqlResource { name: "mv_a_to_b".to_string(), + database: None, setup: vec![ "CREATE MATERIALIZED VIEW mv_a_to_b TO table_b AS SELECT * FROM table_a" .to_string(), @@ -2292,7 +2335,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2305,6 +2348,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create table B - target for materialized view @@ -2314,7 +2359,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2327,11 +2372,14 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create SQL resource for a materialized view let mv_sql_resource = SqlResource { name: "mv_a_to_b".to_string(), + database: None, setup: vec![ "CREATE MATERIALIZED VIEW mv_a_to_b TO table_b AS SELECT * FROM table_a" .to_string(), @@ -2360,7 +2408,7 @@ mod tests { dependency_info: DependencyInfo { // For teardown: Table A depends on MV being gone first pulls_data_from: vec![InfrastructureSignature::SqlResource { - id: "mv_a_to_b".to_string(), + id: mv_sql_resource.name.clone(), }], pushes_data_to: vec![], }, @@ -2372,7 +2420,7 @@ mod tests { dependency_info: DependencyInfo { // For teardown: Table B depends on MV being gone first pulls_data_from: vec![InfrastructureSignature::SqlResource { - id: "mv_a_to_b".to_string(), + id: mv_sql_resource.name.clone(), }], pushes_data_to: vec![], }, @@ -2440,7 +2488,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2453,6 +2501,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let table_b = Table { @@ -2461,7 +2511,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2474,11 +2524,14 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create SQL resource for materialized view let resource = SqlResource { name: "mv_a_to_b".to_string(), + database: None, setup: vec![ "CREATE MATERIALIZED VIEW mv_a_to_b TO table_b AS SELECT * FROM table_a" .to_string(), @@ -2585,7 +2638,7 @@ mod tests { dependency_info: DependencyInfo { // For teardown: Table A depends on MV being gone first pulls_data_from: vec![InfrastructureSignature::SqlResource { - id: "mv_a_to_b".to_string(), + id: resource.name.clone(), }], pushes_data_to: vec![], }, @@ -2596,7 +2649,7 @@ mod tests { dependency_info: DependencyInfo { // For teardown: Table B depends on MV being gone first pulls_data_from: vec![InfrastructureSignature::SqlResource { - id: "mv_a_to_b".to_string(), + id: resource.name.clone(), }], pushes_data_to: vec![], }, @@ -2667,7 +2720,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2680,6 +2733,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create a column @@ -2776,7 +2831,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2789,6 +2844,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create operations with signatures that work with the current implementation @@ -2895,7 +2952,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2908,6 +2965,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; let after_table = Table { @@ -2939,7 +2998,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2952,6 +3011,8 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, }; // Create column changes (remove old_column, add new_column) diff --git a/apps/framework-cli/src/infrastructure/olap/mod.rs b/apps/framework-cli/src/infrastructure/olap/mod.rs index 9df6920774..fe056314c5 100644 --- a/apps/framework-cli/src/infrastructure/olap/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/mod.rs @@ -1,5 +1,6 @@ use clickhouse::ClickhouseChangesError; +use crate::framework::core::infrastructure::sql_resource::SqlResource; use crate::infrastructure::olap::clickhouse::TableWithUnsupportedType; use crate::{ framework::core::infrastructure::table::Table, framework::core::infrastructure_map::OlapChange, @@ -7,7 +8,7 @@ use crate::{ }; pub mod clickhouse; -pub mod clickhouse_alt_client; +pub mod clickhouse_http_client; pub mod ddl_ordering; #[derive(Debug, thiserror::Error)] @@ -52,6 +53,30 @@ pub trait OlapOperations { db_name: &str, project: &Project, ) -> Result<(Vec
, Vec), OlapChangesError>; + + /// Retrieves all SQL resources (views and materialized views) from the database + /// + /// # Arguments + /// + /// * `db_name` - The name of the database to list SQL resources from + /// * `default_database` - The default database name for resolving unqualified table references + /// + /// # Returns + /// + /// * `Result, OlapChangesError>` - A list of SqlResource objects + /// + /// # Errors + /// + /// Returns `OlapChangesError` if: + /// - The database connection fails + /// - The database doesn't exist + /// - The query execution fails + /// - SQL parsing fails + async fn list_sql_resources( + &self, + db_name: &str, + default_database: &str, + ) -> Result, OlapChangesError>; } /// This method dispatches the execution of the changes to the right olap storage. diff --git a/apps/framework-cli/src/infrastructure/orchestration/temporal_client.rs b/apps/framework-cli/src/infrastructure/orchestration/temporal_client.rs index fc44d7db49..dae0523565 100644 --- a/apps/framework-cli/src/infrastructure/orchestration/temporal_client.rs +++ b/apps/framework-cli/src/infrastructure/orchestration/temporal_client.rs @@ -1,5 +1,4 @@ use anyhow::{Error, Result}; -use log::info; use temporal_sdk_core_protos::temporal::api::workflowservice::v1::workflow_service_client::WorkflowServiceClient; use temporal_sdk_core_protos::temporal::api::workflowservice::v1::{ DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeWorkflowExecutionRequest, @@ -12,6 +11,7 @@ use temporal_sdk_core_protos::temporal::api::workflowservice::v1::{ }; use tonic::service::interceptor::InterceptedService; use tonic::transport::{Channel, Uri}; +use tracing::info; use crate::infrastructure::orchestration::temporal::{InvalidTemporalSchemeError, TemporalConfig}; use crate::project::Project; diff --git a/apps/framework-cli/src/infrastructure/processes/blocks_registry.rs b/apps/framework-cli/src/infrastructure/processes/blocks_registry.rs index bad89a00f6..ab6879a559 100644 --- a/apps/framework-cli/src/infrastructure/processes/blocks_registry.rs +++ b/apps/framework-cli/src/infrastructure/processes/blocks_registry.rs @@ -1,7 +1,7 @@ -use log::info; use std::sync::Arc; use std::{collections::HashMap, path::PathBuf}; use tokio::process::Child; +use tracing::info; use crate::project::Project; use crate::{ diff --git a/apps/framework-cli/src/infrastructure/processes/consumption_registry.rs b/apps/framework-cli/src/infrastructure/processes/consumption_registry.rs index df66237972..4271780e16 100644 --- a/apps/framework-cli/src/infrastructure/processes/consumption_registry.rs +++ b/apps/framework-cli/src/infrastructure/processes/consumption_registry.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -use log::info; +use tracing::info; use crate::utilities::system::{RestartingProcess, StartChildFn}; use crate::{ diff --git a/apps/framework-cli/src/infrastructure/processes/functions_registry.rs b/apps/framework-cli/src/infrastructure/processes/functions_registry.rs index 05148864fd..3f0f16597b 100644 --- a/apps/framework-cli/src/infrastructure/processes/functions_registry.rs +++ b/apps/framework-cli/src/infrastructure/processes/functions_registry.rs @@ -8,9 +8,9 @@ use crate::{ project::Project, utilities::system::KillProcessError, }; -use log::{error, info}; use std::collections::HashMap; use std::sync::Arc; +use tracing::{error, info}; #[derive(Debug, thiserror::Error)] pub enum FunctionRegistryError { diff --git a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs index bd7ccf8cca..115e63a59d 100644 --- a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs +++ b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs @@ -9,9 +9,6 @@ //! batching, back pressure, and error handling mechanisms. use futures::TryFutureExt; -use log::error; -use log::info; -use log::{debug, warn}; use rdkafka::consumer::{Consumer, StreamConsumer}; use rdkafka::producer::{DeliveryFuture, Producer}; use rdkafka::Message; @@ -19,6 +16,9 @@ use serde_json::Value; use std::collections::{HashMap, VecDeque}; use std::sync::{Arc, LazyLock}; use tokio::task::JoinHandle; +use tracing::error; +use tracing::info; +use tracing::{debug, warn}; use crate::framework::core::infrastructure::table::Column; use crate::framework::core::infrastructure::table::ColumnType; @@ -485,7 +485,7 @@ async fn sync_kafka_to_kafka( Ok(message) => match message.payload() { Some(payload) => match std::str::from_utf8(payload) { Ok(payload_str) => { - log::trace!( + tracing::trace!( "Received message from {}: {}", source_topic_name, payload_str @@ -631,7 +631,7 @@ async fn sync_kafka_to_clickhouse( let payload = if payload.len() >= 5 && payload[0] == 0x00 { &payload[5..] } else { payload }; match std::str::from_utf8(payload) { Ok(payload_str) => { - log::trace!( + tracing::trace!( "Received message from {}: {}", source_topic_name, payload_str ); @@ -699,17 +699,17 @@ fn mapper_json_to_clickhouse_record( let key = column.name.clone(); let value = map.get(&key); - log::trace!( + tracing::trace!( "Looking to map column {:?} to values in map: {:?}", column, map ); - log::trace!("Value found for key {}: {:?}", key, value); + tracing::trace!("Value found for key {}: {:?}", key, value); match value { Some(Value::Null) => { if column.required { - log::error!("Required column {} has a null value", key); + tracing::error!("Required column {} has a null value", key); } else { record.insert(key, ClickHouseValue::new_null()); } @@ -722,7 +722,7 @@ fn mapper_json_to_clickhouse_record( Err(e) => { // Promote mapping failures to `warn!` so we don't silently skip // individual records when their schema/value deviates. - log::warn!("For column {} with type {}, Error mapping JSON value to ClickHouse value: {}", column.name, &column.data_type, e) + tracing::warn!("For column {} with type {}, Error mapping JSON value to ClickHouse value: {}", column.name, &column.data_type, e) } }; } @@ -878,7 +878,7 @@ fn map_json_value_to_clickhouse_value( for value in arr.iter() { if *value == Value::Null { if !element_nullable { - log::error!("Array of non nullable elements has a null value"); + tracing::error!("Array of non nullable elements has a null value"); } // We are adding the value anyway to match the number of arguments that clickhouse expects array_values.push(ClickHouseValue::new_null()); @@ -925,7 +925,7 @@ fn map_json_value_to_clickhouse_value( match val { Some(Value::Null) => { if col.required { - log::error!("Required column {} has a null value", col_name); + tracing::error!("Required column {} has a null value", col_name); } // We are adding the value anyway to match the number of arguments that clickhouse expects values.push(ClickHouseValue::new_null()); diff --git a/apps/framework-cli/src/infrastructure/processes/mod.rs b/apps/framework-cli/src/infrastructure/processes/mod.rs index a7ddd5b3f8..f16fe598ec 100644 --- a/apps/framework-cli/src/infrastructure/processes/mod.rs +++ b/apps/framework-cli/src/infrastructure/processes/mod.rs @@ -57,7 +57,7 @@ pub async fn execute_changes( for change in changes.iter() { match change { ProcessChange::TopicToTableSyncProcess(Change::Added(sync)) => { - log::info!("Starting sync process: {:?}", sync.id()); + tracing::info!("Starting sync process: {:?}", sync.id()); let target_table_columns = std_columns_to_clickhouse_columns(&sync.columns)?; // Topic doesn't contain the namespace, so we need to build the full topic name @@ -76,11 +76,11 @@ pub async fn execute_changes( ); } ProcessChange::TopicToTableSyncProcess(Change::Removed(sync)) => { - log::info!("Stopping sync process: {:?}", sync.id()); + tracing::info!("Stopping sync process: {:?}", sync.id()); process_registry.syncing.stop_topic_to_table(&sync.id()) } ProcessChange::TopicToTableSyncProcess(Change::Updated { before, after }) => { - log::info!("Replacing Sync process: {:?} by {:?}", before, after); + tracing::info!("Replacing Sync process: {:?} by {:?}", before, after); // Topic doesn't contain the namespace, so we need to build the full topic name let after_source_topic = infra_map.get_topic(&after.source_topic_id)?; @@ -102,7 +102,7 @@ pub async fn execute_changes( ); } ProcessChange::TopicToTopicSyncProcess(Change::Added(sync)) => { - log::info!("Starting sync process: {:?}", sync.id()); + tracing::info!("Starting sync process: {:?}", sync.id()); // Topic doesn't contain the namespace, so we need to build the full topic name let source_topic = infra_map.get_topic(&sync.source_topic_id)?; @@ -118,7 +118,7 @@ pub async fn execute_changes( ); } ProcessChange::TopicToTopicSyncProcess(Change::Removed(sync)) => { - log::info!("Stopping sync process: {:?}", sync.id()); + tracing::info!("Stopping sync process: {:?}", sync.id()); // Topic doesn't contain the namespace, so we need to build the full topic name let target_topic = infra_map.get_topic(&sync.target_topic_id)?; @@ -130,7 +130,7 @@ pub async fn execute_changes( } // TopicToTopicSyncProcess Updated seems impossible ProcessChange::TopicToTopicSyncProcess(Change::Updated { before, after }) => { - log::info!("Replacing Sync process: {:?} by {:?}", before, after); + tracing::info!("Replacing Sync process: {:?} by {:?}", before, after); // Topic doesn't contain the namespace, so we need to build the full topic name let before_target_topic = infra_map.get_topic(&before.target_topic_id)?; @@ -155,17 +155,17 @@ pub async fn execute_changes( ); } ProcessChange::FunctionProcess(Change::Added(function_process)) => { - log::info!("Starting Function process: {:?}", function_process.id()); + tracing::info!("Starting Function process: {:?}", function_process.id()); process_registry .functions .start(infra_map, function_process)?; } ProcessChange::FunctionProcess(Change::Removed(function_process)) => { - log::info!("Stopping Function process: {:?}", function_process.id()); + tracing::info!("Stopping Function process: {:?}", function_process.id()); process_registry.functions.stop(function_process).await; } ProcessChange::FunctionProcess(Change::Updated { before, after }) => { - log::info!("Updating Function process: {:?}", before.id()); + tracing::info!("Updating Function process: {:?}", before.id()); process_registry.functions.stop(before).await; process_registry.functions.start(infra_map, after)?; } @@ -177,37 +177,37 @@ pub async fn execute_changes( after: _, }) => {} ProcessChange::ConsumptionApiWebServer(Change::Added(_)) => { - log::info!("Starting analytics api webserver process"); + tracing::info!("Starting analytics api webserver process"); process_registry.consumption.start()?; } ProcessChange::ConsumptionApiWebServer(Change::Removed(_)) => { - log::info!("Stopping analytics api webserver process"); + tracing::info!("Stopping analytics api webserver process"); process_registry.consumption.stop().await?; } ProcessChange::ConsumptionApiWebServer(Change::Updated { before: _, after: _, }) => { - log::info!("Re-Starting analytics api webserver process"); + tracing::info!("Re-Starting analytics api webserver process"); process_registry.consumption.stop().await?; process_registry.consumption.start()?; } ProcessChange::OrchestrationWorker(Change::Added(new_orchestration_worker)) => { - log::info!("Starting Orchestration worker process"); + tracing::info!("Starting Orchestration worker process"); process_registry .orchestration_workers .start(new_orchestration_worker) .await?; } ProcessChange::OrchestrationWorker(Change::Removed(old_orchestration_worker)) => { - log::info!("Stopping Orchestration worker process"); + tracing::info!("Stopping Orchestration worker process"); process_registry .orchestration_workers .stop(old_orchestration_worker) .await?; } ProcessChange::OrchestrationWorker(Change::Updated { before, after }) => { - log::info!("Restarting Orchestration worker process: {:?}", before.id()); + tracing::info!("Restarting Orchestration worker process: {:?}", before.id()); process_registry.orchestration_workers.stop(before).await?; process_registry.orchestration_workers.start(after).await?; } @@ -225,15 +225,15 @@ pub async fn execute_leader_changes( for change in changes.iter() { match (change, &mut process_registry.blocks) { (ProcessChange::OlapProcess(Change::Added(olap_process)), Some(blocks)) => { - log::info!("Starting Blocks process: {:?}", olap_process.id()); + tracing::info!("Starting Blocks process: {:?}", olap_process.id()); blocks.start(olap_process)?; } (ProcessChange::OlapProcess(Change::Removed(olap_process)), Some(blocks)) => { - log::info!("Stopping Blocks process: {:?}", olap_process.id()); + tracing::info!("Stopping Blocks process: {:?}", olap_process.id()); blocks.stop(olap_process).await?; } (ProcessChange::OlapProcess(Change::Updated { before, after }), Some(blocks)) => { - log::info!("Updating Blocks process: {:?}", before.id()); + tracing::info!("Updating Blocks process: {:?}", before.id()); blocks.stop(before).await?; blocks.start(after)?; } diff --git a/apps/framework-cli/src/infrastructure/processes/orchestration_workers_registry.rs b/apps/framework-cli/src/infrastructure/processes/orchestration_workers_registry.rs index 5a7236934e..0ed3950959 100644 --- a/apps/framework-cli/src/infrastructure/processes/orchestration_workers_registry.rs +++ b/apps/framework-cli/src/infrastructure/processes/orchestration_workers_registry.rs @@ -1,5 +1,5 @@ -use log::info; use std::collections::HashMap; +use tracing::info; use crate::{ cli::settings::Settings, diff --git a/apps/framework-cli/src/infrastructure/redis/connection.rs b/apps/framework-cli/src/infrastructure/redis/connection.rs index f7c4dcc573..1cbde06ed7 100644 --- a/apps/framework-cli/src/infrastructure/redis/connection.rs +++ b/apps/framework-cli/src/infrastructure/redis/connection.rs @@ -98,7 +98,7 @@ impl ConnectionManagerWrapper { match time::timeout(Duration::from_secs(5), client.get_connection_manager()).await { Ok(Ok(conn)) => return Ok(conn), Ok(Err(e)) => { - log::warn!( + tracing::warn!( " Failed to create Redis connection (attempt {}/{}): {}", attempts + 1, max_attempts, @@ -107,7 +107,7 @@ impl ConnectionManagerWrapper { last_error = Some(e); } Err(_) => { - log::warn!( + tracing::warn!( " Timeout creating Redis connection (attempt {}/{})", attempts + 1, max_attempts @@ -186,12 +186,12 @@ impl ConnectionManagerWrapper { match timeout_future.await { Ok(Ok(_response)) => true, Ok(Err(e)) => { - log::warn!(" Redis ping failed: {:?}", e); + tracing::warn!(" Redis ping failed: {:?}", e); self.state.store(false, Ordering::SeqCst); false } Err(e) => { - log::warn!(" Redis ping timed out: {:?}", e); + tracing::warn!(" Redis ping timed out: {:?}", e); self.state.store(false, Ordering::SeqCst); false } @@ -217,7 +217,7 @@ impl ConnectionManagerWrapper { pub async fn attempt_reconnection(&mut self, config: &RedisConfig) { let mut backoff = 5; while !self.state.load(Ordering::SeqCst) { - log::info!( + tracing::info!( " Attempting to reconnect to Redis at {} (backoff: {}s)", config.effective_url(), backoff @@ -239,10 +239,10 @@ impl ConnectionManagerWrapper { self.pub_sub = new_pubsub; // Store the new client for future connection creation self.client = Arc::new(client); - log::info!(" Successfully reconnected both Redis connections"); + tracing::info!(" Successfully reconnected both Redis connections"); } Err(e) => { - log::warn!(" Reconnected main connection but failed to reconnect pub_sub: {}", e); + tracing::warn!(" Reconnected main connection but failed to reconnect pub_sub: {}", e); // Still mark as reconnected since the main connection succeeded } } @@ -251,13 +251,16 @@ impl ConnectionManagerWrapper { break; } Err(err) => { - log::warn!(" Failed to reconnect to Redis: {}", err); + tracing::warn!( + " Failed to reconnect to Redis: {}", + err + ); backoff = std::cmp::min(backoff * 2, 60); } } } Err(err) => { - log::warn!( + tracing::warn!( " Failed to create Redis client for reconnection: {}", err ); @@ -272,7 +275,7 @@ impl ConnectionManagerWrapper { /// This method should be called as part of the application shutdown sequence /// to ensure Redis connections are properly terminated. pub async fn shutdown(&self) { - log::info!(" Shutting down Redis connections"); + tracing::info!(" Shutting down Redis connections"); // Send QUIT command to both connection managers let mut conn = self.connection.clone(); @@ -284,6 +287,6 @@ impl ConnectionManagerWrapper { // Mark the connection as disconnected self.state.store(false, Ordering::SeqCst); - log::info!(" Redis connections shutdown complete"); + tracing::info!(" Redis connections shutdown complete"); } } diff --git a/apps/framework-cli/src/infrastructure/redis/leadership.rs b/apps/framework-cli/src/infrastructure/redis/leadership.rs index 2671b59575..2abfb62b59 100644 --- a/apps/framework-cli/src/infrastructure/redis/leadership.rs +++ b/apps/framework-cli/src/infrastructure/redis/leadership.rs @@ -83,7 +83,7 @@ impl LeadershipManager { .await { Ok(2) => { - log::debug!( + tracing::debug!( " Lock acquired: {} by instance {}", lock_key, instance_id @@ -98,7 +98,7 @@ impl LeadershipManager { (false, false) // doesn't have lock and not new acquisition } Err(e) => { - log::error!(" Error acquiring lock {}: {}", lock_key, e); + tracing::error!(" Error acquiring lock {}: {}", lock_key, e); (false, false) // doesn't have lock and not new acquisition } } @@ -152,7 +152,7 @@ impl LeadershipManager { .await { Ok(1) => { - log::trace!( + tracing::trace!( " Lock renewed: {} for instance {}", lock_key, instance_id @@ -160,7 +160,7 @@ impl LeadershipManager { Ok(true) } Ok(0) => { - log::warn!( + tracing::warn!( " Cannot renew lock {} - not owned by instance {}", lock_key, instance_id @@ -168,7 +168,7 @@ impl LeadershipManager { Ok(false) } Ok(_) => { - log::warn!( + tracing::warn!( " Unexpected result while renewing lock {} for instance {}", lock_key, instance_id @@ -176,7 +176,7 @@ impl LeadershipManager { Ok(false) } Err(e) => { - log::error!(" Error renewing lock {}: {}", lock_key, e); + tracing::error!(" Error renewing lock {}: {}", lock_key, e); Err(anyhow::anyhow!("Error renewing lock: {}", e)) } } diff --git a/apps/framework-cli/src/infrastructure/redis/messaging.rs b/apps/framework-cli/src/infrastructure/redis/messaging.rs index 8b81a9e447..2eb5f0482f 100644 --- a/apps/framework-cli/src/infrastructure/redis/messaging.rs +++ b/apps/framework-cli/src/infrastructure/redis/messaging.rs @@ -76,11 +76,11 @@ impl MessagingManager { match conn.publish::<_, _, ()>(&channel, message).await { Ok(_) => { - log::debug!(" Message published to channel {}", channel); + tracing::debug!(" Message published to channel {}", channel); Ok(()) } Err(e) => { - log::error!( + tracing::error!( " Failed to publish message to channel {}: {}", channel, e diff --git a/apps/framework-cli/src/infrastructure/redis/mock.rs b/apps/framework-cli/src/infrastructure/redis/mock.rs index 90464d04f8..c5dfe4f51c 100644 --- a/apps/framework-cli/src/infrastructure/redis/mock.rs +++ b/apps/framework-cli/src/infrastructure/redis/mock.rs @@ -1,5 +1,4 @@ use anyhow::Result; -use log; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::RwLock; @@ -140,13 +139,13 @@ impl MockRedisClient { let mut queues_map = self.queues.write().await; if !queues_map.contains_key(queue) { - log::debug!(" Creating new queue: {}", queue); + tracing::debug!(" Creating new queue: {}", queue); queues_map.insert(queue.to_string(), Vec::new()); } if let Some(queue_vec) = queues_map.get_mut(queue) { queue_vec.push(message.to_string()); - log::debug!( + tracing::debug!( " Added message to queue {}, length now: {}", queue, queue_vec.len() @@ -171,7 +170,7 @@ impl MockRedisClient { if let Some(queue_vec) = queues_map.get_mut(queue) { if !queue_vec.is_empty() { let message = queue_vec.remove(0); - log::debug!( + tracing::debug!( " Retrieved message from queue {}, length now: {}", queue, queue_vec.len() @@ -179,7 +178,7 @@ impl MockRedisClient { return Ok(Some(message)); } } else { - log::debug!(" Queue {} does not exist", queue); + tracing::debug!(" Queue {} does not exist", queue); } Ok(None) diff --git a/apps/framework-cli/src/infrastructure/redis/presence.rs b/apps/framework-cli/src/infrastructure/redis/presence.rs index cf02f4b3ce..4fd65218ca 100644 --- a/apps/framework-cli/src/infrastructure/redis/presence.rs +++ b/apps/framework-cli/src/infrastructure/redis/presence.rs @@ -54,14 +54,14 @@ impl PresenceManager { match conn.set_ex::<_, _, ()>(&key, now, 3).await { Ok(_) => { - log::debug!( + tracing::debug!( " Updated presence for instance {}", self.instance_id ); Ok(()) } Err(e) => { - log::error!( + tracing::error!( " Failed to update presence for instance {}: {}", self.instance_id, e diff --git a/apps/framework-cli/src/infrastructure/redis/redis_client.rs b/apps/framework-cli/src/infrastructure/redis/redis_client.rs index 5ffee8ca3a..c81a963aa8 100644 --- a/apps/framework-cli/src/infrastructure/redis/redis_client.rs +++ b/apps/framework-cli/src/infrastructure/redis/redis_client.rs @@ -431,7 +431,7 @@ impl RedisClient { ); let broadcast_channel = format!("{}::msgchannel", self.config.key_prefix); - log::info!( + tracing::info!( " Starting message listener on channels: {} and {}", instance_channel, broadcast_channel @@ -449,7 +449,7 @@ impl RedisClient { let mut pubsub = match client.get_async_pubsub().await { Ok(pubsub) => pubsub, Err(e) => { - log::error!(" Failed to get pubsub connection: {}", e); + tracing::error!(" Failed to get pubsub connection: {}", e); return; } }; @@ -458,7 +458,7 @@ impl RedisClient { .subscribe(&[&instance_channel_clone, &broadcast_channel_clone]) .await { - log::error!(" Failed to subscribe to channels: {}", e); + tracing::error!(" Failed to subscribe to channels: {}", e); return; } @@ -466,13 +466,13 @@ impl RedisClient { let msg = pubsub.on_message().next().await; if let Some(msg) = msg { if let Ok(payload) = msg.get_payload::() { - log::info!(" Received message: {}", payload); + tracing::info!(" Received message: {}", payload); let handlers = callbacks.read().await.clone(); for handler in handlers.iter() { handler(payload.clone()); } } else { - log::warn!(" Failed to decode message payload"); + tracing::warn!(" Failed to decode message payload"); } } else { tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; @@ -683,11 +683,11 @@ impl RedisClient { { Ok(true) => { // Lock successfully renewed - log::debug!(" Lock '{}' renewed successfully", &lock_key); + tracing::debug!(" Lock '{}' renewed successfully", &lock_key); } Ok(false) => { // Lock not owned by this instance anymore - log::warn!( + tracing::warn!( " Failed to renew lock '{}': not owned by this instance", &lock_key ); @@ -695,7 +695,7 @@ impl RedisClient { } Err(e) => { // Error occurred while renewing - log::error!(" Error renewing lock '{}': {}", &lock_key, e); + tracing::error!(" Error renewing lock '{}': {}", &lock_key, e); // Continue trying to renew } } @@ -733,7 +733,7 @@ impl RedisClient { { Ok(_) => {} Err(e) => { - log::error!(" Failed to update presence: {}", e); + tracing::error!(" Failed to update presence: {}", e); } } } @@ -761,7 +761,7 @@ impl RedisClient { .update_presence(connection_manager.connection.clone()) .await { - log::error!(" Error updating presence: {}", e); + tracing::error!(" Error updating presence: {}", e); } } }); @@ -786,7 +786,7 @@ impl RedisClient { loop { tokio::time::sleep(Duration::from_secs(5)).await; if !connection_manager.ping().await { - log::warn!(" Redis connection lost, attempting reconnection"); + tracing::warn!(" Redis connection lost, attempting reconnection"); connection_manager.attempt_reconnection(&config).await; } } @@ -921,7 +921,7 @@ impl RedisClient { { Ok(_) => Ok(()), Err(e) => { - log::warn!( + tracing::warn!( " Failed to publish message (Redis may be unavailable): {}", e ); @@ -959,7 +959,7 @@ impl RedisClient { { Ok(_) => Ok(()), Err(e) => { - log::warn!( + tracing::warn!( " Failed to broadcast message (Redis may be unavailable): {}", e ); @@ -986,7 +986,7 @@ impl RedisClient { impl Drop for RedisClient { fn drop(&mut self) { - log::info!(" RedisClient is being dropped. Cleaning up tasks."); + tracing::info!(" RedisClient is being dropped. Cleaning up tasks."); // First, abort any running tasks to prevent them from using the connection if let Ok(mut guard) = self.listener_task.try_write() { diff --git a/apps/framework-cli/src/infrastructure/stream/kafka/client.rs b/apps/framework-cli/src/infrastructure/stream/kafka/client.rs index d24407b2ff..bb972dce47 100644 --- a/apps/framework-cli/src/infrastructure/stream/kafka/client.rs +++ b/apps/framework-cli/src/infrastructure/stream/kafka/client.rs @@ -13,7 +13,6 @@ use crate::infrastructure::stream::kafka::constants::{ DEFAULT_MAX_MESSAGE_BYTES, KAFKA_MAX_MESSAGE_BYTES_CONFIG_KEY, KAFKA_RETENTION_CONFIG_KEY, }; use crate::project::Project; -use log::{error, info, warn}; use rdkafka::admin::{AlterConfig, NewPartitions, ResourceSpecifier}; use rdkafka::config::RDKafkaLogLevel; use rdkafka::consumer::stream_consumer::StreamConsumer; @@ -28,6 +27,7 @@ use rdkafka::{ }; use std::collections::{HashMap, VecDeque}; use std::time::Duration; +use tracing::{error, info, warn}; use super::constants::{ DEFAULT_RETENTION_MS, KAFKA_ACKS_CONFIG_KEY, KAFKA_AUTO_COMMIT_INTERVAL_MS_CONFIG_KEY, diff --git a/apps/framework-cli/src/infrastructure/webapp.rs b/apps/framework-cli/src/infrastructure/webapp.rs index 47b98ba786..5ae64081fd 100644 --- a/apps/framework-cli/src/infrastructure/webapp.rs +++ b/apps/framework-cli/src/infrastructure/webapp.rs @@ -12,9 +12,9 @@ pub async fn execute_changes( web_app_changes: &[WebAppChange], webapp_changes_channel: Sender, ) -> Result<(), WebAppChangeError> { - log::info!("📤 Sending {} WebApp changes", web_app_changes.len()); + tracing::info!("📤 Sending {} WebApp changes", web_app_changes.len()); for webapp_change in web_app_changes.iter() { - log::info!("📤 Sending WebApp change: {:?}", webapp_change); + tracing::info!("📤 Sending WebApp change: {:?}", webapp_change); webapp_changes_channel.send(webapp_change.clone()).await?; } diff --git a/apps/framework-cli/src/main.rs b/apps/framework-cli/src/main.rs index 60cf9edeb0..66aada6f27 100644 --- a/apps/framework-cli/src/main.rs +++ b/apps/framework-cli/src/main.rs @@ -29,7 +29,7 @@ fn ensure_terminal_cleanup() { let _ = disable_raw_mode(); let _ = stdout.flush(); - log::info!("Terminal cleanup complete via crossterm"); + tracing::info!("Terminal cleanup complete via crossterm"); } // Entry point for the CLI application diff --git a/apps/framework-cli/src/mcp/server.rs b/apps/framework-cli/src/mcp/server.rs index 57f4e726ab..9932adc149 100644 --- a/apps/framework-cli/src/mcp/server.rs +++ b/apps/framework-cli/src/mcp/server.rs @@ -1,4 +1,3 @@ -use log::info; use rmcp::{ model::{ CallToolRequestParam, CallToolResult, ErrorCode, Implementation, ListResourcesResult, @@ -12,6 +11,7 @@ use rmcp::{ ErrorData, RoleServer, ServerHandler, }; use std::sync::Arc; +use tracing::info; use super::embedded_docs; use super::tools::{ diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs b/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs index c7bf079cc6..b12823c5fb 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs +++ b/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs @@ -7,157 +7,32 @@ //! Initial implementation focuses on ClickHouse diagnostics with extensible architecture //! for future infrastructure types. //! -//! ## ClickHouse Diagnostic Providers -//! -//! The tool automatically runs multiple diagnostic providers based on table engine types: -//! -//! ### 1. MutationDiagnostic -//! Detects stuck or failing mutations (ALTER operations) that can block table maintenance. -//! - **Source**: `system.mutations` -//! - **Detection**: Mutations not done (is_done = 0) or with non-empty failure reasons -//! - **Thresholds**: -//! - Error: Mutation has a failure reason (latest_fail_reason not empty) -//! - Warning: Mutation in progress but not completed (is_done = 0) -//! - **Suggested Action**: Cancel stuck mutations with KILL MUTATION -//! -//! ### 2. PartsDiagnostic -//! Identifies excessive data parts per partition that impact query performance. -//! - **Source**: `system.parts` -//! - **Detection**: Active parts count per partition > 100 -//! - **Thresholds**: -//! - Error: part_count > 300 -//! - Warning: 100 < part_count ≤ 300 -//! - **Suggested Action**: Run OPTIMIZE TABLE to merge parts -//! -//! ### 3. MergeDiagnostic -//! Monitors long-running background merges. -//! - **Source**: `system.merges` -//! - **Detection**: Merges running > 300 seconds -//! - **Thresholds**: -//! - Error: elapsed_time > 1800s (30 minutes) -//! - Warning: 300s < elapsed_time ≤ 1800s -//! - **Note**: Progress is tracked and reported but not used in severity determination -//! - **Suggested Action**: Monitor merge progress and check server resources (CPU, disk I/O, memory) -//! -//! ### 4. ErrorStatsDiagnostic -//! Aggregates errors from ClickHouse system.errors to surface recurring issues. -//! - **Source**: `system.errors` -//! - **Detection**: All errors with count > 0 (reports top 10 by occurrence) -//! - **Thresholds**: -//! - Error: error_count > 100 -//! - Warning: error_count > 10 -//! - Info: 0 < error_count ≤ 10 -//! - **Suggested Action**: Review error messages and recent system changes -//! -//! ### 5. S3QueueDiagnostic (S3Queue tables only) -//! Detects S3Queue ingestion failures and processing issues. -//! - **Source**: `system.s3queue_log` -//! - **Detection**: Failed or ProcessingFailed status entries in S3Queue log -//! - **Threshold**: All failed entries trigger Error severity -//! - **Suggested Action**: Check S3 credentials, permissions, and file formats -//! -//! ### 6. ReplicationDiagnostic (Replicated* tables only) -//! Monitors replication health, queue backlogs, and stuck replication entries. -//! - **Sources**: `system.replication_queue`, `system.replicas` -//! - **Detection**: -//! - Large queue backlogs (queue_size > 10 or > 100 for replicas health) -//! - Stuck entries (num_tries > 3 or has exceptions) -//! - Replica health issues (readonly, session_expired, high delay > 300s) -//! - **Thresholds**: -//! - Error: queue_size > 50, num_tries > 10, session_expired, delay > 600s -//! - Warning: queue_size > 10, 3 < num_tries ≤ 10, readonly, 300s < delay ≤ 600s -//! - **Suggested Action**: Check ZooKeeper connectivity, restart replication queues -//! -//! ### 7. MergeFailureDiagnostic -//! Detects system-wide background merge failures that may affect multiple tables. -//! - **Source**: `system.metrics` -//! - **Detection**: FailedBackgroundMerges metric > 0 -//! - **Thresholds**: -//! - Error: failed_merges > 10 -//! - Warning: failed_merges > 0 -//! - **Suggested Action**: Check system.errors for merge failure details, review disk space -//! -//! ### 8. StoppedOperationsDiagnostic -//! Identifies manually stopped or stalled merge/replication operations. -//! - **Sources**: `system.parts`, `system.merges`, `system.replicas` -//! - **Detection**: -//! - Many parts (>100) but no active merges -//! - Replica readonly with pending queue items -//! - **Thresholds**: -//! - Error: Replica readonly with queue items (replication stopped) -//! - Warning: Excessive parts with no merges (merges possibly stopped) -//! - **Suggested Action**: Run SYSTEM START MERGES or SYSTEM START REPLICATION QUEUES -//! -//! ## Query Timeout -//! All diagnostic queries have a 30-second timeout to prevent blocking on slow queries. -//! -//! ## Filtering Options -//! - **Component Filter**: Regex pattern to target specific tables/components -//! - **Severity Filter**: Filter by error, warning, or info (default: info shows all) -//! - **Time Filter**: Filter issues by time range (e.g., "-1h" for last hour) -//! -//! ## Output Format -//! Returns structured JSON with: -//! - \`severity\`: error, warning, or info -//! - \`source\`: System table(s) queried -//! - \`component\`: Affected table/component -//! - \`error_type\`: Category of issue -//! - \`message\`: Human-readable description -//! - \`details\`: Additional context (counts, values) -//! - \`suggested_action\`: Remediation steps -//! - \`related_queries\`: Diagnostic and fix queries - -// Diagnostic provider modules -mod errors; -mod merge_failures; -mod merges; -mod mutations; -mod parts; -mod replication; -mod s3queue; -mod stopped_operations; - -use log::{debug, info}; +//! See the shared `crate::infrastructure::olap::clickhouse::diagnostics` module for +//! detailed documentation on each diagnostic provider. + use regex::Regex; use rmcp::model::{CallToolResult, Tool}; -use serde::{Deserialize, Serialize}; use serde_json::{json, Map, Value}; use std::collections::HashMap; use std::sync::Arc; +use tracing::{debug, info}; use super::{create_error_result, create_success_result}; use crate::framework::core::infrastructure_map::InfrastructureMap; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; -use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; +use crate::infrastructure::olap::clickhouse::diagnostics::{ + Component, DiagnosticOptions, DiagnosticOutput, DiagnosticRequest, InfrastructureType, Severity, +}; use crate::infrastructure::redis::redis_client::RedisClient; -// Re-export diagnostic providers -pub use errors::ErrorStatsDiagnostic; -pub use merge_failures::MergeFailureDiagnostic; -pub use merges::MergeDiagnostic; -pub use mutations::MutationDiagnostic; -pub use parts::PartsDiagnostic; -pub use replication::ReplicationDiagnostic; -pub use s3queue::S3QueueDiagnostic; -pub use stopped_operations::StoppedOperationsDiagnostic; - -/// Error types for infrastructure diagnostic operations +/// Error types for MCP infrastructure diagnostic operations #[derive(Debug, thiserror::Error)] pub enum DiagnoseError { #[error("Failed to load infrastructure map: {0}")] InfraMapLoad(#[from] anyhow::Error), - #[error("Failed to connect to ClickHouse: {0}")] - ClickHouseConnection(String), - - #[error("Failed to execute diagnostic query: {0}")] - QueryFailed(String), - - #[error("Query timeout after {0} seconds")] - QueryTimeout(u64), - - #[error("Failed to parse query result: {0}")] - ParseError(String), + #[error("Failed to execute diagnostics: {0}")] + DiagnosticFailed(String), #[error("Invalid parameter: {0}")] InvalidParameter(String), @@ -173,25 +48,6 @@ pub enum DiagnoseError { UnsupportedInfrastructureType(String), } -/// Infrastructure type enum -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum InfrastructureType { - ClickHouse, - // Future support: - // Kafka, - // Temporal, -} - -impl InfrastructureType { - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "clickhouse" => Ok(InfrastructureType::ClickHouse), - _ => Err(DiagnoseError::UnsupportedInfrastructureType(s.to_string())), - } - } -} - /// Component filter for targeting specific infrastructure components #[derive(Debug, Clone)] pub struct ComponentFilter { @@ -201,13 +57,17 @@ pub struct ComponentFilter { pub component_name: Option, } -/// Severity level for issues -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum Severity { - Error, - Warning, - Info, +/// Parameters for the diagnose_infrastructure MCP tool +#[derive(Debug)] +pub struct DiagnoseInfraParams { + /// Which infrastructure type to diagnose + pub infrastructure_type: InfrastructureType, + /// Optional filter for specific components + pub component_filter: Option, + /// Minimum severity level to report + pub severity: Severity, + /// Optional time filter (e.g., "-1h" for last hour) + pub since: Option, } impl Severity { @@ -223,126 +83,17 @@ impl Severity { ))), } } - - /// Check if this severity should include issues of the given level - fn includes(&self, other: &Severity) -> bool { - match self { - Severity::Info => true, // Info includes all severities - Severity::Warning => matches!(other, Severity::Warning | Severity::Error), - Severity::Error => matches!(other, Severity::Error), - } - } -} - -/// Parameters for the diagnose_infrastructure tool -#[derive(Debug)] -pub struct DiagnoseInfraParams { - /// Which infrastructure type to diagnose - pub infrastructure_type: InfrastructureType, - /// Optional filter for specific components - pub component_filter: Option, - /// Minimum severity level to report - pub severity: Severity, - /// Optional time filter (e.g., "-1h" for last hour) - pub since: Option, -} - -/// Component information for issue context -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Component { - pub component_type: String, - pub name: String, - /// Flexible metadata for component-specific context (e.g., database, namespace, cluster) - #[serde(skip_serializing_if = "HashMap::is_empty")] - pub metadata: HashMap, } -/// Detailed information about an infrastructure issue -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Issue { - pub severity: Severity, - pub source: String, - pub component: Component, - pub error_type: String, - pub message: String, - pub details: Map, - pub suggested_action: String, - pub related_queries: Vec, -} - -/// Summary statistics for diagnostic results -#[derive(Debug, Serialize, Deserialize)] -pub struct IssueSummary { - pub total_issues: usize, - pub by_severity: HashMap, - pub by_component: HashMap, -} - -/// Complete diagnostic output -#[derive(Debug, Serialize, Deserialize)] -pub struct DiagnosticOutput { - pub infrastructure_type: InfrastructureType, - pub issues: Vec, - pub summary: IssueSummary, -} - -impl DiagnosticOutput { - /// Create a new diagnostic output and compute summary statistics - pub fn new(infrastructure_type: InfrastructureType, issues: Vec) -> Self { - let mut by_severity = HashMap::new(); - let mut by_component = HashMap::new(); - - for issue in &issues { - let severity_key = format!("{:?}", issue.severity).to_lowercase(); - *by_severity.entry(severity_key).or_insert(0) += 1; - - let component_key = issue.component.name.clone(); - *by_component.entry(component_key).or_insert(0) += 1; - } - - let summary = IssueSummary { - total_issues: issues.len(), - by_severity, - by_component, - }; - - Self { - infrastructure_type, - issues, - summary, +impl InfrastructureType { + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "clickhouse" => Ok(InfrastructureType::ClickHouse), + _ => Err(DiagnoseError::UnsupportedInfrastructureType(s.to_string())), } } } -/// Trait for ClickHouse diagnostic providers -/// Each provider implements checks for a specific aspect of ClickHouse infrastructure health -/// -/// Note: Currently ClickHouse-specific. Will need refactoring to support other -/// infrastructure types (Kafka, Temporal, etc.) in the future. -#[async_trait::async_trait] -pub trait DiagnosticProvider: Send + Sync { - /// Name of this diagnostic provider - fn name(&self) -> &str; - - /// Check if this provider is applicable to the given component - fn applicable_to(&self, component: &Component, engine: Option<&ClickhouseEngine>) -> bool; - - /// Check if this provider is system-wide (not component-specific) - /// System-wide providers are run once, not per-component - fn is_system_wide(&self) -> bool { - false - } - - /// Run diagnostics and return list of issues found - async fn diagnose( - &self, - component: &Component, - engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - since: Option<&str>, - ) -> Result, DiagnoseError>; -} - /// Returns the tool definition for the MCP server pub fn tool_definition() -> Tool { let schema = json!({ @@ -387,7 +138,7 @@ pub fn tool_definition() -> Tool { Tool { name: "diagnose_infrastructure".into(), description: Some( - "Proactively diagnose infrastructure issues by intelligently checking relevant diagnostic sources based on infrastructure type. For ClickHouse, automatically checks: stuck mutations, S3Queue ingestion errors (for S3Queue tables), replication health (for replicated tables), data parts issues, background merge problems, system errors, and Docker container logs. Returns structured, actionable information about errors and warnings with suggested remediation steps.".into() + "Proactively diagnose infrastructure issues by intelligently checking relevant diagnostic sources based on infrastructure type. For ClickHouse, automatically checks: stuck mutations, S3Queue ingestion errors (for S3Queue tables), replication health (for replicated tables), data parts issues, background merge problems, system errors, and stopped operations. Returns structured, actionable information about errors and warnings with suggested remediation steps.".into() ), input_schema: Arc::new(schema.as_object().unwrap().clone()), annotations: None, @@ -463,7 +214,7 @@ fn parse_params( }) } -/// Handle the tool call with the given arguments +/// Handle the MCP tool call with the given arguments pub async fn handle_call( arguments: Option<&Map>, redis_client: Arc, @@ -504,7 +255,7 @@ async fn execute_diagnose_infrastructure( } } -/// Diagnose ClickHouse infrastructure +/// Diagnose ClickHouse infrastructure using the shared diagnostics module async fn diagnose_clickhouse( params: DiagnoseInfraParams, redis_client: Arc, @@ -547,124 +298,46 @@ async fn diagnose_clickhouse( debug!("Checking {} tables for issues", tables_to_check.len()); - // Create diagnostic providers - let providers = create_clickhouse_providers(); - - // Separate component-specific and system-wide providers - let component_providers: Vec<_> = providers.iter().filter(|p| !p.is_system_wide()).collect(); - let system_wide_providers: Vec<_> = providers.iter().filter(|p| p.is_system_wide()).collect(); - - // Run diagnostics for each table - let mut all_issues = Vec::new(); - - for (_map_key, table) in tables_to_check { - let mut metadata = HashMap::new(); - metadata.insert("database".to_string(), clickhouse_config.db_name.clone()); - - let component = Component { - component_type: "table".to_string(), - name: table.name.clone(), // Use the actual table name, not the infra map key - metadata, - }; + // Build DiagnosticRequest with components from infrastructure map + let components: Vec<_> = tables_to_check + .iter() + .map(|(_map_key, table)| { + let mut metadata = HashMap::new(); + metadata.insert("database".to_string(), clickhouse_config.db_name.clone()); - let engine = table.engine.as_ref(); - - // Run each applicable component-specific provider - for provider in &component_providers { - if provider.applicable_to(&component, engine) { - debug!( - "Running {} diagnostic for table {}", - provider.name(), - table.name - ); - - match provider - .diagnose( - &component, - engine, - clickhouse_config, - params.since.as_deref(), - ) - .await - { - Ok(mut issues) => { - // Filter by severity - issues.retain(|issue| params.severity.includes(&issue.severity)); - all_issues.extend(issues); - } - Err(e) => { - debug!( - "Provider {} failed for table {}: {}", - provider.name(), - table.name, - e - ); - // Continue with other providers even if one fails - } - } - } - } - } + let component = Component { + component_type: "table".to_string(), + name: table.name.clone(), // Use the actual table name + metadata, + }; - // Run system-wide diagnostics once - let mut system_metadata = HashMap::new(); - system_metadata.insert("database".to_string(), clickhouse_config.db_name.clone()); + (component, table.engine.clone()) + }) + .collect(); - let system_component = Component { - component_type: "system".to_string(), - name: "clickhouse".to_string(), - metadata: system_metadata, + let request = DiagnosticRequest { + components, + options: DiagnosticOptions { + diagnostic_names: Vec::new(), // Run all diagnostics + min_severity: params.severity, + since: params.since, + }, }; - for provider in system_wide_providers { - debug!("Running system-wide {} diagnostic", provider.name()); - - match provider - .diagnose( - &system_component, - None, - clickhouse_config, - params.since.as_deref(), - ) - .await - { - Ok(mut issues) => { - // Filter by severity - issues.retain(|issue| params.severity.includes(&issue.severity)); - all_issues.extend(issues); - } - Err(e) => { - debug!("System-wide provider {} failed: {}", provider.name(), e); - // Continue with other providers even if one fails - } - } - } - - // TODO: Add Docker logs diagnostic (not component-specific) + // Use the shared run_diagnostics function + let output = crate::infrastructure::olap::clickhouse::diagnostics::run_diagnostics( + request, + clickhouse_config, + ) + .await + .map_err(|e| DiagnoseError::DiagnosticFailed(format!("{}", e)))?; info!( "Infrastructure diagnostics complete. Found {} issues.", - all_issues.len() + output.issues.len() ); - Ok(DiagnosticOutput::new( - InfrastructureType::ClickHouse, - all_issues, - )) -} - -/// Create all ClickHouse diagnostic providers -fn create_clickhouse_providers() -> Vec> { - vec![ - Box::new(MutationDiagnostic), - Box::new(PartsDiagnostic), - Box::new(MergeDiagnostic), - Box::new(ErrorStatsDiagnostic), - Box::new(S3QueueDiagnostic), - Box::new(ReplicationDiagnostic), - Box::new(MergeFailureDiagnostic), - Box::new(StoppedOperationsDiagnostic), - ] + Ok(output) } #[cfg(test)] @@ -699,24 +372,20 @@ mod tests { #[test] fn test_severity_includes() { - let error = Severity::Error; - let warning = Severity::Warning; - let info = Severity::Info; - - // Error only includes error - assert!(error.includes(&Severity::Error)); - assert!(!error.includes(&Severity::Warning)); - assert!(!error.includes(&Severity::Info)); + // Info includes all + assert!(Severity::Info.includes(&Severity::Error)); + assert!(Severity::Info.includes(&Severity::Warning)); + assert!(Severity::Info.includes(&Severity::Info)); // Warning includes warning and error - assert!(warning.includes(&Severity::Error)); - assert!(warning.includes(&Severity::Warning)); - assert!(!warning.includes(&Severity::Info)); - - // Info includes everything - assert!(info.includes(&Severity::Error)); - assert!(info.includes(&Severity::Warning)); - assert!(info.includes(&Severity::Info)); + assert!(Severity::Warning.includes(&Severity::Error)); + assert!(Severity::Warning.includes(&Severity::Warning)); + assert!(!Severity::Warning.includes(&Severity::Info)); + + // Error includes only error + assert!(Severity::Error.includes(&Severity::Error)); + assert!(!Severity::Error.includes(&Severity::Warning)); + assert!(!Severity::Error.includes(&Severity::Info)); } #[test] @@ -724,18 +393,20 @@ mod tests { let args = json!({ "infrastructure_type": "clickhouse" }); - let map = args.as_object().unwrap(); - let result = parse_params(Some(map)); - assert!(result.is_ok()); - let params = result.unwrap(); - assert_eq!(params.infrastructure_type, InfrastructureType::ClickHouse); + + let params = parse_params(args.as_object()).unwrap(); + + assert!(matches!( + params.infrastructure_type, + InfrastructureType::ClickHouse + )); assert!(params.component_filter.is_none()); - assert_eq!(params.severity, Severity::Info); + assert!(matches!(params.severity, Severity::Info)); // Default assert!(params.since.is_none()); } #[test] - fn test_parse_params_with_filter() { + fn test_parse_params_full() { let args = json!({ "infrastructure_type": "clickhouse", "component_filter": { @@ -745,114 +416,149 @@ mod tests { "severity": "error", "since": "-1h" }); - let map = args.as_object().unwrap(); - let result = parse_params(Some(map)); - assert!(result.is_ok()); - let params = result.unwrap(); - assert!(params.component_filter.is_some()); + + let params = parse_params(args.as_object()).unwrap(); + + assert!(matches!( + params.infrastructure_type, + InfrastructureType::ClickHouse + )); + assert!(matches!(params.severity, Severity::Error)); + assert_eq!(params.since, Some("-1h".to_string())); + let filter = params.component_filter.unwrap(); assert_eq!(filter.component_type, Some("table".to_string())); assert!(filter.component_name.is_some()); - assert_eq!(params.severity, Severity::Error); - assert_eq!(params.since, Some("-1h".to_string())); + + let regex = filter.component_name.unwrap(); + assert!(regex.is_match("user_events")); + assert!(regex.is_match("user_profiles")); + assert!(!regex.is_match("events")); } #[test] - fn test_parse_params_invalid() { - // Missing required parameter - let args = json!({}); - let map = args.as_object().unwrap(); - assert!(parse_params(Some(map)).is_err()); - - // Invalid infrastructure type - let args = json!({"infrastructure_type": "invalid"}); - let map = args.as_object().unwrap(); - assert!(parse_params(Some(map)).is_err()); - - // Invalid severity + fn test_parse_params_component_filter_type_only() { let args = json!({ "infrastructure_type": "clickhouse", - "severity": "invalid" + "component_filter": { + "component_type": "view" + } }); - let map = args.as_object().unwrap(); - assert!(parse_params(Some(map)).is_err()); - // Invalid regex pattern + let params = parse_params(args.as_object()).unwrap(); + + let filter = params.component_filter.unwrap(); + assert_eq!(filter.component_type, Some("view".to_string())); + assert!(filter.component_name.is_none()); + } + + #[test] + fn test_parse_params_component_filter_name_only() { let args = json!({ "infrastructure_type": "clickhouse", "component_filter": { - "component_name": "[invalid" + "component_name": "events" } }); - let map = args.as_object().unwrap(); - assert!(parse_params(Some(map)).is_err()); + + let params = parse_params(args.as_object()).unwrap(); + + let filter = params.component_filter.unwrap(); + assert!(filter.component_type.is_none()); + assert!(filter.component_name.is_some()); } #[test] - fn test_diagnostic_output_summary() { - let issues = vec![ - Issue { - severity: Severity::Error, - source: "system.mutations".to_string(), - component: Component { - component_type: "table".to_string(), - name: "users".to_string(), - metadata: { - let mut m = HashMap::new(); - m.insert("database".to_string(), "test_db".to_string()); - m - }, - }, - error_type: "stuck_mutation".to_string(), - message: "Mutation stuck".to_string(), - details: Map::new(), - suggested_action: "Kill mutation".to_string(), - related_queries: vec![], - }, - Issue { - severity: Severity::Warning, - source: "system.parts".to_string(), - component: Component { - component_type: "table".to_string(), - name: "users".to_string(), - metadata: { - let mut m = HashMap::new(); - m.insert("database".to_string(), "test_db".to_string()); - m - }, - }, - error_type: "excessive_parts".to_string(), - message: "Too many parts".to_string(), - details: Map::new(), - suggested_action: "Optimize table".to_string(), - related_queries: vec![], - }, - Issue { - severity: Severity::Error, - source: "system.mutations".to_string(), - component: Component { - component_type: "table".to_string(), - name: "orders".to_string(), - metadata: { - let mut m = HashMap::new(); - m.insert("database".to_string(), "test_db".to_string()); - m - }, - }, - error_type: "failed_mutation".to_string(), - message: "Mutation failed".to_string(), - details: Map::new(), - suggested_action: "Check logs".to_string(), - related_queries: vec![], - }, - ]; + fn test_parse_params_invalid_regex() { + let args = json!({ + "infrastructure_type": "clickhouse", + "component_filter": { + "component_name": "[invalid(regex" + } + }); + + let result = parse_params(args.as_object()); + assert!(matches!(result, Err(DiagnoseError::InvalidRegex { .. }))); + + if let Err(DiagnoseError::InvalidRegex { pattern, .. }) = result { + assert_eq!(pattern, "[invalid(regex"); + } + } + + #[test] + fn test_parse_params_invalid_infrastructure_type() { + let args = json!({ + "infrastructure_type": "kafka" + }); - let output = DiagnosticOutput::new(InfrastructureType::ClickHouse, issues); + let result = parse_params(args.as_object()); + assert!(matches!( + result, + Err(DiagnoseError::UnsupportedInfrastructureType(_)) + )); + } + + #[test] + fn test_parse_params_invalid_severity() { + let args = json!({ + "infrastructure_type": "clickhouse", + "severity": "critical" + }); - assert_eq!(output.summary.total_issues, 3); - assert_eq!(output.summary.by_severity.get("error"), Some(&2)); - assert_eq!(output.summary.by_severity.get("warning"), Some(&1)); - assert_eq!(output.summary.by_component.get("users"), Some(&2)); - assert_eq!(output.summary.by_component.get("orders"), Some(&1)); + let result = parse_params(args.as_object()); + assert!(matches!(result, Err(DiagnoseError::InvalidParameter(_)))); + } + + #[test] + fn test_parse_params_missing_infrastructure_type() { + let args = json!({ + "severity": "error" + }); + + let result = parse_params(args.as_object()); + assert!(matches!(result, Err(DiagnoseError::InvalidParameter(_)))); + } + + #[test] + fn test_parse_params_no_arguments() { + let result = parse_params(None); + assert!(matches!(result, Err(DiagnoseError::InvalidParameter(_)))); + } + + #[test] + fn test_parse_params_all_severity_variants() { + for (severity_str, expected) in [ + ("error", Severity::Error), + ("warning", Severity::Warning), + ("info", Severity::Info), + ("all", Severity::Info), // "all" maps to Info + ] { + let args = json!({ + "infrastructure_type": "clickhouse", + "severity": severity_str + }); + + let params = parse_params(args.as_object()).unwrap(); + assert_eq!( + params.severity, expected, + "Failed for severity: {}", + severity_str + ); + } + } + + #[test] + fn test_parse_params_case_insensitive() { + let args = json!({ + "infrastructure_type": "CLICKHOUSE", + "severity": "ERROR" + }); + + let params = parse_params(args.as_object()).unwrap(); + assert!(matches!( + params.infrastructure_type, + InfrastructureType::ClickHouse + )); + assert!(matches!(params.severity, Severity::Error)); } } diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/stopped_operations.rs b/apps/framework-cli/src/mcp/tools/infra_issues/stopped_operations.rs deleted file mode 100644 index 62b6cb7d69..0000000000 --- a/apps/framework-cli/src/mcp/tools/infra_issues/stopped_operations.rs +++ /dev/null @@ -1,210 +0,0 @@ -//! Diagnostic provider for checking stopped operations (merges, replication) - -use log::debug; -use serde_json::{json, Map, Value}; - -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; -use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; -use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; -use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - -/// Query timeout for diagnostic checks (30 seconds) -const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; - -/// Diagnostic provider for checking stopped operations (merges, replication) -pub struct StoppedOperationsDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for StoppedOperationsDiagnostic { - fn name(&self) -> &str { - "stopped_operations" - } - - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Applicable to all tables - we check both merges and replication - true - } - - async fn diagnose( - &self, - component: &Component, - engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; - - let mut issues = Vec::new(); - - // Check if merges are stopped for this table - // We can detect this by checking if there are no running merges but many parts - let parts_count_query = format!( - "SELECT count() as part_count - FROM system.parts - WHERE database = '{}' AND table = '{}' AND active = 1 - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing parts count query: {}", parts_count_query); - - let parts_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&parts_count_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let parts_json: Value = serde_json::from_str(&parts_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let parts_count = parts_json - .get("data") - .and_then(|v| v.as_array()) - .and_then(|arr| arr.first()) - .and_then(|row| row.get("part_count")) - .and_then(|v| v.as_u64()) - .unwrap_or(0); - - // If we have many parts, check if merges are running - if parts_count > 100 { - let merges_query = format!( - "SELECT count() as merge_count - FROM system.merges - WHERE database = '{}' AND table = '{}' - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing merges query: {}", merges_query); - - let merges_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&merges_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let merges_json: Value = serde_json::from_str(&merges_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let merge_count = merges_json - .get("data") - .and_then(|v| v.as_array()) - .and_then(|arr| arr.first()) - .and_then(|row| row.get("merge_count")) - .and_then(|v| v.as_u64()) - .unwrap_or(0); - - // If we have excessive parts but no merges running, merges might be stopped - if merge_count == 0 { - let mut details = Map::new(); - details.insert("part_count".to_string(), json!(parts_count)); - details.insert("active_merges".to_string(), json!(0)); - - issues.push(Issue { - severity: Severity::Warning, - source: "system.parts,system.merges".to_string(), - component: component.clone(), - error_type: "merges_possibly_stopped".to_string(), - message: format!( - "Table has {} active parts but no running merges. Merges may be stopped or throttled.", - parts_count - ), - details, - suggested_action: format!( - "Check if merges were manually stopped with 'SELECT * FROM system.settings WHERE name LIKE \"%merge%\"'. Start merges if needed: 'SYSTEM START MERGES {}.{}'", - config.db_name, component.name - ), - related_queries: vec![ - format!( - "SELECT * FROM system.parts WHERE database = '{}' AND table = '{}' AND active = 1 ORDER BY modification_time DESC LIMIT 20", - config.db_name, component.name - ), - format!( - "SYSTEM START MERGES {}.{}", - config.db_name, component.name - ), - ], - }); - } - } - - // For replicated tables, check if replication queues are stopped - let is_replicated = matches!( - engine, - Some(ClickhouseEngine::ReplicatedMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedReplacingMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedSummingMergeTree { .. }) - ); - - if is_replicated { - let replicas_query = format!( - "SELECT is_readonly, queue_size - FROM system.replicas - WHERE database = '{}' AND table = '{}' - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing replicas query: {}", replicas_query); - - let replicas_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&replicas_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let replicas_json: Value = serde_json::from_str(&replicas_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - if let Some(replica_data) = replicas_json.get("data").and_then(|v| v.as_array()) { - for row in replica_data { - let is_readonly = row.get("is_readonly").and_then(|v| v.as_u64()).unwrap_or(0); - let queue_size = row.get("queue_size").and_then(|v| v.as_u64()).unwrap_or(0); - - // If replica is readonly with items in queue, replication might be stopped - if is_readonly == 1 && queue_size > 0 { - let mut details = Map::new(); - details.insert("is_readonly".to_string(), json!(true)); - details.insert("queue_size".to_string(), json!(queue_size)); - - issues.push(Issue { - severity: Severity::Error, - source: "system.replicas".to_string(), - component: component.clone(), - error_type: "replication_stopped".to_string(), - message: format!( - "Replica is in read-only mode with {} items in queue. Replication may be stopped.", - queue_size - ), - details, - suggested_action: format!( - "Investigate why replica is read-only. Try restarting replication: 'SYSTEM START REPLICATION QUEUES {}.{}'", - config.db_name, component.name - ), - related_queries: vec![ - format!( - "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", - config.db_name, component.name - ), - format!( - "SYSTEM START REPLICATION QUEUES {}.{}", - config.db_name, component.name - ), - ], - }); - } - } - } - } - - Ok(issues) - } -} diff --git a/apps/framework-cli/src/mcp/tools/query_olap.rs b/apps/framework-cli/src/mcp/tools/query_olap.rs index 517f2a702f..203389a151 100644 --- a/apps/framework-cli/src/mcp/tools/query_olap.rs +++ b/apps/framework-cli/src/mcp/tools/query_olap.rs @@ -3,13 +3,13 @@ //! This module implements the MCP tool for executing read-only SQL queries //! against the ClickHouse OLAP database for data exploration and debugging. -use log::{debug, info}; use rmcp::model::{Annotated, CallToolResult, RawContent, RawTextContent, Tool}; use serde_json::{json, Map, Value}; use sqlparser::ast::Statement; use sqlparser::dialect::ClickHouseDialect; use sqlparser::parser::Parser; use std::sync::Arc; +use tracing::{debug, info}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; diff --git a/apps/framework-cli/src/mcp/tools/sample_stream.rs b/apps/framework-cli/src/mcp/tools/sample_stream.rs index 0ebf6f9b91..79f36cb2e2 100644 --- a/apps/framework-cli/src/mcp/tools/sample_stream.rs +++ b/apps/framework-cli/src/mcp/tools/sample_stream.rs @@ -4,7 +4,6 @@ //! It provides functionality to retrieve recent messages from topics for debugging and exploration. use futures::stream::BoxStream; -use log::info; use rdkafka::consumer::Consumer; use rdkafka::{Message as KafkaMessage, Offset, TopicPartitionList}; use rmcp::model::{CallToolResult, Tool}; @@ -12,6 +11,7 @@ use serde_json::{json, Map, Value}; use std::sync::Arc; use std::time::Duration; use tokio_stream::StreamExt; +use tracing::info; use super::{create_error_result, create_success_result}; use crate::framework::core::infrastructure_map::InfrastructureMap; @@ -248,7 +248,7 @@ async fn collect_messages_from_stream( match result { Ok(Ok(value)) => messages.push(value), Ok(Err(e)) => { - log::warn!( + tracing::warn!( "Error deserializing message from stream '{}': {}", stream_name, e @@ -256,7 +256,7 @@ async fn collect_messages_from_stream( error_count += 1; } Err(_elapsed) => { - log::info!( + tracing::info!( "Timeout waiting for messages from stream '{}' after {} seconds. Retrieved {} messages.", stream_name, SAMPLE_TIMEOUT_SECS, diff --git a/apps/framework-cli/src/metrics.rs b/apps/framework-cli/src/metrics.rs index e1d57dddad..aea7869ead 100644 --- a/apps/framework-cli/src/metrics.rs +++ b/apps/framework-cli/src/metrics.rs @@ -20,7 +20,7 @@ use crate::metrics_inserter::MetricsInserter; use crate::utilities::constants::{CLI_VERSION, CONTEXT, CTX_SESSION_ID}; use crate::utilities::decode_object; use chrono::{DateTime, Utc}; -use log::{trace, warn}; +use tracing::{trace, warn}; const DEFAULT_ANONYMOUS_METRICS_URL: &str = "https://moosefood.514.dev/ingest/MooseSessionTelemetry/0.6"; diff --git a/apps/framework-cli/src/project.rs b/apps/framework-cli/src/project.rs index 5579373c43..54796a7852 100644 --- a/apps/framework-cli/src/project.rs +++ b/apps/framework-cli/src/project.rs @@ -80,10 +80,10 @@ use crate::utilities::git::GitConfig; use crate::utilities::PathExt; use crate::utilities::_true; use config::{Config, ConfigError, Environment, File}; -use log::{debug, error}; use python_project::PythonProject; use serde::Deserialize; use serde::Serialize; +use tracing::{debug, error}; /// Represents errors that can occur during project file operations #[derive(Debug, thiserror::Error)] diff --git a/apps/framework-cli/src/utilities.rs b/apps/framework-cli/src/utilities.rs index 0895d5e687..922c8310bd 100644 --- a/apps/framework-cli/src/utilities.rs +++ b/apps/framework-cli/src/utilities.rs @@ -8,6 +8,7 @@ pub mod docker; pub mod dotenv; pub mod git; pub mod identifiers; +pub mod json; pub mod keyring; pub mod machine_id; pub mod nodejs_version; diff --git a/apps/framework-cli/src/utilities/capture.rs b/apps/framework-cli/src/utilities/capture.rs index 5d7f455d1b..5743795a1a 100644 --- a/apps/framework-cli/src/utilities/capture.rs +++ b/apps/framework-cli/src/utilities/capture.rs @@ -65,6 +65,8 @@ pub enum ActivityType { GenerateSDKCommand, #[serde(rename = "peekCommand")] PeekCommand, + #[serde(rename = "queryCommand")] + QueryCommand, #[serde(rename = "workflowCommand")] WorkflowCommand, #[serde(rename = "workflowInitCommand")] @@ -130,7 +132,7 @@ pub fn capture_usage( let client = match PostHog514Client::from_env(machine_id) { Some(client) => client, None => { - log::warn!("PostHog client not configured - missing POSTHOG_API_KEY"); + tracing::warn!("PostHog client not configured - missing POSTHOG_API_KEY"); return; } }; @@ -145,7 +147,7 @@ pub fn capture_usage( ) .await { - log::warn!("Failed to send telemetry to PostHog: {:?}", e); + tracing::warn!("Failed to send telemetry to PostHog: {:?}", e); } })) } diff --git a/apps/framework-cli/src/utilities/docker.rs b/apps/framework-cli/src/utilities/docker.rs index fc792c46de..b22f0c7966 100644 --- a/apps/framework-cli/src/utilities/docker.rs +++ b/apps/framework-cli/src/utilities/docker.rs @@ -1,12 +1,12 @@ use handlebars::Handlebars; use lazy_static::lazy_static; -use log::{error, info, warn}; use regex::Regex; use serde::Deserialize; use serde_json::json; use std::path::PathBuf; use std::process::{Command, Stdio}; use tokio::io::{AsyncBufReadExt, BufReader}; +use tracing::{error, info, warn}; use crate::cli::settings::Settings; use crate::project::Project; diff --git a/apps/framework-cli/src/utilities/dotenv.rs b/apps/framework-cli/src/utilities/dotenv.rs index e43b69be69..a788af905f 100644 --- a/apps/framework-cli/src/utilities/dotenv.rs +++ b/apps/framework-cli/src/utilities/dotenv.rs @@ -34,8 +34,8 @@ //! MOOSE_CLICKHOUSE_CONFIG__PASSWORD=my-secret //! ``` -use log::{debug, info}; use std::path::Path; +use tracing::{debug, info}; /// Represents the runtime environment for the Moose project #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/apps/framework-cli/src/utilities/json.rs b/apps/framework-cli/src/utilities/json.rs new file mode 100644 index 0000000000..f32c938fdc --- /dev/null +++ b/apps/framework-cli/src/utilities/json.rs @@ -0,0 +1,179 @@ +//! JSON serialization utilities +//! +//! Provides sorted-key JSON serialization for deterministic output. +//! +//! ## Why sorted keys? +//! +//! Migration files (`remote_state.json`, `local_infra_map.json`, `plan.yaml`) are +//! committed to version control. Without sorted keys, HashMaps serialize in random +//! order, causing noisy diffs even when nothing changed semantically. +//! +//! Rust's `serde_json` doesn't provide native sorted serialization, so we implement +//! it here rather than adding a dependency for this single use case. + +use serde::Serialize; +use serde_json::{Map, Value}; + +/// Recursively sorts all object keys in a JSON value +/// +/// This function traverses a JSON value tree and converts all objects +/// (maps) to use sorted keys. Arrays and primitive values are preserved as-is. +/// +/// # Arguments +/// * `value` - The JSON value to sort +/// +/// # Returns +/// A new JSON value with all object keys sorted alphabetically +pub fn sort_json_keys(value: Value) -> Value { + match value { + Value::Object(map) => { + let mut sorted_map = Map::new(); + // Collect keys and sort them + let mut keys: Vec = map.keys().cloned().collect(); + keys.sort(); + + // Insert values in sorted key order, recursively sorting nested values + for key in keys { + if let Some(val) = map.get(&key) { + sorted_map.insert(key, sort_json_keys(val.clone())); + } + } + Value::Object(sorted_map) + } + Value::Array(arr) => { + // Recursively sort keys in array elements, but don't sort the array itself + Value::Array(arr.into_iter().map(sort_json_keys).collect()) + } + // Primitive values pass through unchanged + other => other, + } +} + +/// Serializes a value to a pretty-printed JSON string with sorted keys +/// +/// This function is a drop-in replacement for `serde_json::to_string_pretty` +/// that ensures all object keys are sorted alphabetically for consistent output. +/// +/// # Arguments +/// * `value` - Any serializable value +/// +/// # Returns +/// A Result containing the pretty-printed JSON string with sorted keys, +/// or a serialization error +/// +/// # Examples +/// ```ignore +/// use crate::utilities::json::to_string_pretty_sorted; +/// +/// let data = MyStruct { ... }; +/// let json = to_string_pretty_sorted(&data)?; +/// std::fs::write("output.json", json)?; +/// ``` +pub fn to_string_pretty_sorted(value: &T) -> serde_json::Result { + // First serialize to a JSON value + let json_value = serde_json::to_value(value)?; + + // Sort all keys recursively + let sorted_value = sort_json_keys(json_value); + + // Serialize to pretty string + serde_json::to_string_pretty(&sorted_value) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_sort_simple_object() { + let input = json!({ + "zebra": 1, + "apple": 2, + "mango": 3 + }); + + let sorted = sort_json_keys(input); + let output = serde_json::to_string(&sorted).unwrap(); + + // Keys should be in alphabetical order + assert!(output.find("apple").unwrap() < output.find("mango").unwrap()); + assert!(output.find("mango").unwrap() < output.find("zebra").unwrap()); + } + + #[test] + fn test_sort_nested_objects() { + let input = json!({ + "outer_z": { + "inner_z": 1, + "inner_a": 2 + }, + "outer_a": { + "inner_z": 3, + "inner_a": 4 + } + }); + + let sorted = sort_json_keys(input); + let output = serde_json::to_string(&sorted).unwrap(); + + // Outer keys should be sorted + assert!(output.find("outer_a").unwrap() < output.find("outer_z").unwrap()); + } + + #[test] + fn test_arrays_preserve_order() { + let input = json!({ + "items": [ + {"name": "zebra", "id": 1}, + {"name": "apple", "id": 2} + ] + }); + + let sorted = sort_json_keys(input); + + // Array order should be preserved + if let Value::Object(map) = &sorted { + if let Some(Value::Array(items)) = map.get("items") { + assert_eq!(items.len(), 2); + assert_eq!(items[0]["id"], 1); // zebra still first + assert_eq!(items[1]["id"], 2); // apple still second + } else { + panic!("Expected array"); + } + } else { + panic!("Expected object"); + } + } + + #[test] + fn test_to_string_pretty_sorted() { + use serde::Serialize; + use std::collections::HashMap; + + #[derive(Serialize)] + struct TestStruct { + zebra: String, + apple: String, + mango: HashMap, + } + + let mut map = HashMap::new(); + map.insert("z_key".to_string(), 1); + map.insert("a_key".to_string(), 2); + + let test_data = TestStruct { + zebra: "last".to_string(), + apple: "first".to_string(), + mango: map, + }; + + let output = to_string_pretty_sorted(&test_data).unwrap(); + + // apple should appear before zebra in the output + assert!(output.find("apple").unwrap() < output.find("zebra").unwrap()); + + // Nested keys should also be sorted + assert!(output.find("a_key").unwrap() < output.find("z_key").unwrap()); + } +} diff --git a/apps/framework-cli/src/utilities/machine_id.rs b/apps/framework-cli/src/utilities/machine_id.rs index cfc68ec3fb..a7e091d364 100644 --- a/apps/framework-cli/src/utilities/machine_id.rs +++ b/apps/framework-cli/src/utilities/machine_id.rs @@ -1,7 +1,7 @@ use home::home_dir; -use log::warn; use std::fs; use std::path::PathBuf; +use tracing::warn; use uuid::Uuid; const MACHINE_ID_FILE: &str = ".fiveonefour/machine_id"; diff --git a/apps/framework-cli/src/utilities/migration_plan_schema.json b/apps/framework-cli/src/utilities/migration_plan_schema.json index 85ba7da9f4..a9794af48c 100644 --- a/apps/framework-cli/src/utilities/migration_plan_schema.json +++ b/apps/framework-cli/src/utilities/migration_plan_schema.json @@ -201,8 +201,13 @@ } }, "engine": { - "type": ["string", "null"], - "default": null + "anyOf": [ + { "type": "string" }, + { "type": "object" }, + { "type": "null" } + ], + "default": null, + "description": "Table engine configuration. Can be a simple string (e.g., 'MergeTree') or an object for complex engines (e.g., ReplicatedMergeTree with parameters)" }, "version": { "anyOf": [ diff --git a/apps/framework-cli/src/utilities/nodejs_version.rs b/apps/framework-cli/src/utilities/nodejs_version.rs index fd7be0e37f..c1824a3802 100644 --- a/apps/framework-cli/src/utilities/nodejs_version.rs +++ b/apps/framework-cli/src/utilities/nodejs_version.rs @@ -1,8 +1,8 @@ -use log::{debug, info, warn}; use semver::{Version, VersionReq}; use serde_json::Value as JsonValue; use std::fs; use std::path::Path; +use tracing::{debug, info, warn}; #[derive(Debug, Clone)] pub struct NodeVersion { diff --git a/apps/framework-cli/src/utilities/package_managers.rs b/apps/framework-cli/src/utilities/package_managers.rs index fa2787face..9d1e87b210 100644 --- a/apps/framework-cli/src/utilities/package_managers.rs +++ b/apps/framework-cli/src/utilities/package_managers.rs @@ -3,7 +3,7 @@ use std::{fmt, path::PathBuf, process::Command}; use home::home_dir; -use log::{debug, error}; +use tracing::{debug, error}; use crate::utilities::constants::{PACKAGE_LOCK_JSON, PNPM_LOCK, YARN_LOCK}; diff --git a/apps/framework-cli/src/utilities/process_output.rs b/apps/framework-cli/src/utilities/process_output.rs index 7e66ea4df2..38661c5d8e 100644 --- a/apps/framework-cli/src/utilities/process_output.rs +++ b/apps/framework-cli/src/utilities/process_output.rs @@ -1,8 +1,8 @@ -use log::{error, info, warn}; use std::io::{BufRead, BufReader as StdBufReader}; use std::process::{Command, Stdio}; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::{ChildStderr, ChildStdout}; +use tracing::{error, info, warn}; /// Utility for safely managing subprocess output while preventing terminal corruption. /// diff --git a/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs b/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs index 1dcaf89134..823862a003 100644 --- a/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs +++ b/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs @@ -151,6 +151,9 @@ services: {{/if}} - clickhouse-0-logs:/var/log/clickhouse-server/ - clickhouse-0-users:/etc/clickhouse-server/users.d +{{#if clickhouse_clusters_file}} + - "{{clickhouse_clusters_file}}:/etc/clickhouse-server/config.d/clusters.xml:ro" +{{/if}} environment: - CLICKHOUSE_DB=${DB_NAME:-local} - CLICKHOUSE_USER=${CLICKHOUSE_USER:-panda} diff --git a/apps/framework-cli/src/utilities/system.rs b/apps/framework-cli/src/utilities/system.rs index 2fd86998b5..2aebeb6477 100644 --- a/apps/framework-cli/src/utilities/system.rs +++ b/apps/framework-cli/src/utilities/system.rs @@ -1,5 +1,4 @@ //! System utilities -use log::{debug, error, info, warn}; use std::fmt::Debug; use std::time::Duration; use std::{ @@ -10,6 +9,7 @@ use tokio::process::Child; use tokio::select; use tokio::task::JoinHandle; use tokio::time::{sleep, Instant}; +use tracing::{debug, error, info, warn}; #[derive(Debug, thiserror::Error)] #[non_exhaustive] @@ -57,7 +57,7 @@ pub async fn kill_child(child: &mut Child) -> Result<(), KillProcessError> { let status = kill.wait().await?; if !status.success() { - log::warn!("Failed to send SIGTERM to process {}", id); + tracing::warn!("Failed to send SIGTERM to process {}", id); } // Wait for the child process to exit with a timeout (10 seconds) diff --git a/apps/framework-docs-v2/.npmrc b/apps/framework-docs-v2/.npmrc deleted file mode 100644 index afab184d37..0000000000 --- a/apps/framework-docs-v2/.npmrc +++ /dev/null @@ -1,6 +0,0 @@ -# Force all dependencies to be hoisted locally to this app's node_modules -# This prevents TypeScript from finding React types in nested node_modules -# This overrides the root .npmrc which prevents hoisting to support multiple React versions -# Since this app only uses React 19, we can safely hoist everything here -shamefully-hoist=true - diff --git a/apps/framework-docs-v2/README.md b/apps/framework-docs-v2/README.md index 51fe19b55a..c9b6722671 100644 --- a/apps/framework-docs-v2/README.md +++ b/apps/framework-docs-v2/README.md @@ -28,6 +28,42 @@ pnpm build pnpm test:snippets ``` +## Environment Variables + +Create a `.env.local` file in the root directory with the following variables: + +```bash +# GitHub API token (optional but recommended) +# Without token: 60 requests/hour rate limit +# With token: 5,000 requests/hour rate limit +GITHUB_TOKEN=your_github_token_here +``` + +### Creating a GitHub Token + +**Option 1: Using GitHub CLI (recommended)** + +If you have the GitHub CLI (`gh`) installed and authenticated: + +```bash +# Get your current GitHub token +gh auth token + +# Or create a new token with specific scopes +gh auth refresh -s public_repo +``` + +Then add the token to your `.env.local` file. + +**Option 2: Using the Web Interface** + +1. Go to https://github.com/settings/tokens +2. Click "Generate new token" → "Generate new token (classic)" +3. Give it a name (e.g., "Moose Docs") +4. Select the `public_repo` scope (or no scopes needed for public repos) +5. Generate and copy the token +6. Add it to your `.env.local` file + ## Structure - `/src/app/typescript` - TypeScript documentation diff --git a/apps/framework-docs-v2/content/guides/applications/automated-reports/guide-overview.mdx b/apps/framework-docs-v2/content/guides/applications/automated-reports/guide-overview.mdx new file mode 100644 index 0000000000..83bede250b --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/automated-reports/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Automated Reports Overview +description: Overview of building automated reporting systems with MooseStack +--- + +# Automated Reports Overview + +This guide covers how to build automated reporting systems using MooseStack workflows and APIs. + +## Overview + +Automated reporting systems enable scheduled and event-driven report generation, distribution, and management. + +## Getting Started + +Select a starting point from the sidebar to begin implementing automated reports. + diff --git a/apps/framework-docs-v2/content/guides/applications/going-to-production/guide-overview.mdx b/apps/framework-docs-v2/content/guides/applications/going-to-production/guide-overview.mdx new file mode 100644 index 0000000000..4a678d9e11 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/going-to-production/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Going to Production Overview +description: Overview of preparing and deploying MooseStack applications to production +--- + +# Going to Production Overview + +This guide covers best practices and considerations for deploying MooseStack applications to production environments. + +## Overview + +Deploying to production requires careful planning around infrastructure, monitoring, security, and scalability. + +## Getting Started + +Select a starting point from the sidebar to begin preparing for production deployment. + diff --git a/apps/framework-docs-v2/content/guides/applications/in-app-chat-analytics/guide-overview.mdx b/apps/framework-docs-v2/content/guides/applications/in-app-chat-analytics/guide-overview.mdx new file mode 100644 index 0000000000..2a66499cc5 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/in-app-chat-analytics/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: In-App Chat Analytics Overview +description: Overview of implementing analytics for in-app chat features +--- + +# In-App Chat Analytics Overview + +This guide shows you how to implement analytics for in-app chat features using MooseStack. + +## Overview + +Implementing analytics for in-app chat requires tracking messages, user interactions, and engagement metrics in real-time. + +## Getting Started + +Select a starting point from the sidebar to begin implementing in-app chat analytics. + diff --git a/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db.mdx b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db.mdx new file mode 100644 index 0000000000..9e2771e003 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db.mdx @@ -0,0 +1,36 @@ +--- +title: From Existing OLTP DB +description: Build performant dashboards by connecting to your existing OLTP database +--- + +# From Existing OLTP DB + +This guide walks you through building performant dashboards by connecting to your existing OLTP (Online Transaction Processing) database and creating optimized materialized views for analytics. + +## Overview + +When you have an existing OLTP database (like PostgreSQL, MySQL, or SQL Server), you can leverage MooseStack to create high-performance dashboards without disrupting your production database. This approach involves: + +1. **Connecting** to your existing database +2. **Creating materialized views** that aggregate and pre-compute data +3. **Querying** the materialized views for fast dashboard responses + +## Benefits + +- **No disruption** to your production OLTP database +- **Fast queries** through pre-aggregated materialized views +- **Real-time updates** as data changes in your source database +- **Scalable** architecture that separates transactional and analytical workloads + +## Prerequisites + +Before starting, ensure you have: + +- Access to your existing OLTP database +- Database connection credentials +- A MooseStack project initialized + +## Implementation Steps + +Follow the steps below to implement performant dashboards from your existing OLTP database. Each step builds on the previous one, guiding you through the complete setup process. + diff --git a/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection.mdx b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection.mdx new file mode 100644 index 0000000000..7437a35590 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection.mdx @@ -0,0 +1,80 @@ +--- +title: Setup Connection +description: Configure connection to your existing OLTP database +--- + +import { LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Step 1: Setup Connection + +In this step, you'll configure MooseStack to connect to your existing OLTP database. + +## Overview + +MooseStack needs to connect to your existing database to read data and create materialized views. This connection is configured securely and doesn't require any changes to your production database. + +## Configuration + + + +```typescript filename="moose.config.ts" +import { defineConfig } from "@514labs/moose-cli"; + +export default defineConfig({ + dataSources: { + postgres: { + type: "postgres", + host: process.env.DB_HOST || "localhost", + port: parseInt(process.env.DB_PORT || "5432"), + database: process.env.DB_NAME || "mydb", + user: process.env.DB_USER || "postgres", + password: process.env.DB_PASSWORD || "", + }, + }, +}); +``` + + +```python filename="moose.config.py" +from moose_cli import define_config + +config = define_config( + data_sources={ + "postgres": { + "type": "postgres", + "host": "localhost", + "port": 5432, + "database": "mydb", + "user": "postgres", + "password": "", + } + } +) +``` + + + +## Environment Variables + +For security, store sensitive credentials in environment variables: + +```bash filename=".env" +DB_HOST=your-db-host +DB_PORT=5432 +DB_NAME=your-database +DB_USER=your-username +DB_PASSWORD=your-password +``` + +## Verify Connection + +After configuring the connection, verify it works: + +```bash +moose db ping +``` + +## Next Steps + +Once your connection is configured, proceed to the next step to create materialized views. + diff --git a/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/2-create-materialized-view[@scope=initiative].mdx b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/2-create-materialized-view[@scope=initiative].mdx new file mode 100644 index 0000000000..d699bb3515 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/2-create-materialized-view[@scope=initiative].mdx @@ -0,0 +1,108 @@ +--- +title: Create Materialized View (Initiative) +description: Create optimized materialized views for initiative-level dashboard queries +--- + +import { LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Step 2: Create Materialized View (Initiative Scope) + +In this step, you'll create materialized views that pre-aggregate data from your OLTP database for fast dashboard queries, specifically tailored for initiative-level reporting. + +## Overview + +Materialized views store pre-computed query results, allowing dashboards to load instantly without querying your production OLTP database directly. This step shows you how to define and create these views. + +## Define Materialized View + + + +```typescript filename="dashboard-views.ts" +import { OlapMaterializedView, OlapTable } from "@514labs/moose-lib"; + +// Define the source table (from your OLTP DB) +interface OrdersTable { + id: string; + customer_id: string; + amount: number; + created_at: Date; +} + +// Define the materialized view +interface InitiativeSalesView { + date: Date; + initiative_id: string; + total_sales: number; + order_count: number; +} + +export const initiativeSalesView = new OlapMaterializedView( + "initiative_sales", + { + source: "orders", // References your OLTP table + query: ` + SELECT + toDate(created_at) as date, + initiative_id, + sum(amount) as total_sales, + count(*) as order_count + FROM orders + GROUP BY date, initiative_id + `, + refresh: "incremental", // Update as new data arrives + } +); +``` + + +```python filename="dashboard_views.py" +from moose_lib import OlapMaterializedView +from pydantic import BaseModel +from datetime import date + +# Define the materialized view +class InitiativeSalesView(BaseModel): + date: date + initiative_id: str + total_sales: float + order_count: int + +initiative_sales_view = OlapMaterializedView[InitiativeSalesView]( + "initiative_sales", + source="orders", + query=""" + SELECT + toDate(created_at) as date, + initiative_id, + sum(amount) as total_sales, + count(*) as order_count + FROM orders + GROUP BY date, initiative_id + """, + refresh="incremental", +) +``` + + + +## Apply the View + +Create the materialized view in your database: + +```bash +moose db migrate +``` + +## Query the View + +Once created, you can query the materialized view directly: + +```typescript +const results = await initiativeSalesView.select({ + date: { $gte: new Date("2024-01-01") }, +}); +``` + +## Next Steps + +Your materialized view is now ready! You can use it in your dashboard queries for fast, pre-aggregated data. diff --git a/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/2-create-materialized-view[@scope=project].mdx b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/2-create-materialized-view[@scope=project].mdx new file mode 100644 index 0000000000..957905cba8 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/existing-oltp-db/2-create-materialized-view[@scope=project].mdx @@ -0,0 +1,105 @@ +--- +title: Create Materialized View +description: Create optimized materialized views for dashboard queries +--- + +import { LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Step 2: Create Materialized View + +In this step, you'll create materialized views that pre-aggregate data from your OLTP database for fast dashboard queries. + +## Overview + +Materialized views store pre-computed query results, allowing dashboards to load instantly without querying your production OLTP database directly. This step shows you how to define and create these views. + +## Define Materialized View + + + +```typescript filename="dashboard-views.ts" +import { OlapMaterializedView, OlapTable } from "@514labs/moose-lib"; + +// Define the source table (from your OLTP DB) +interface OrdersTable { + id: string; + customer_id: string; + amount: number; + created_at: Date; +} + +// Define the materialized view +interface DailySalesView { + date: Date; + total_sales: number; + order_count: number; +} + +export const dailySalesView = new OlapMaterializedView( + "daily_sales", + { + source: "orders", // References your OLTP table + query: ` + SELECT + toDate(created_at) as date, + sum(amount) as total_sales, + count(*) as order_count + FROM orders + GROUP BY date + `, + refresh: "incremental", // Update as new data arrives + } +); +``` + + +```python filename="dashboard_views.py" +from moose_lib import OlapMaterializedView +from pydantic import BaseModel +from datetime import date + +# Define the materialized view +class DailySalesView(BaseModel): + date: date + total_sales: float + order_count: int + +daily_sales_view = OlapMaterializedView[DailySalesView]( + "daily_sales", + source="orders", + query=""" + SELECT + toDate(created_at) as date, + sum(amount) as total_sales, + count(*) as order_count + FROM orders + GROUP BY date + """, + refresh="incremental", +) +``` + + + +## Apply the View + +Create the materialized view in your database: + +```bash +moose db migrate +``` + +## Query the View + +Once created, you can query the materialized view directly: + +```typescript +const results = await dailySalesView.select({ + date: { $gte: new Date("2024-01-01") }, +}); +``` + +## Next Steps + +Your materialized view is now ready! You can use it in your dashboard queries for fast, pre-aggregated data. + diff --git a/apps/framework-docs-v2/content/guides/applications/performant-dashboards/guide-overview.mdx b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/guide-overview.mdx new file mode 100644 index 0000000000..f560deff1a --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/guide-overview.mdx @@ -0,0 +1,24 @@ +--- +title: Performant Dashboards Overview +description: Overview of building high-performance dashboards with MooseStack +--- + +# Performant Dashboards Overview + +This guide covers best practices for building performant dashboards using MooseStack. + +## Overview + +Building dashboards that load quickly and provide real-time insights requires careful consideration of data modeling, query optimization, and caching strategies. + +## Key Concepts + +- Materialized views for pre-aggregated data +- Efficient query patterns +- Caching strategies +- Real-time data updates + +## Getting Started + +Select a starting point from the sidebar to begin implementing performant dashboards. + diff --git a/apps/framework-docs-v2/content/guides/applications/performant-dashboards/guide.toml b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/guide.toml new file mode 100644 index 0000000000..b230fac9eb --- /dev/null +++ b/apps/framework-docs-v2/content/guides/applications/performant-dashboards/guide.toml @@ -0,0 +1,49 @@ +id = "performant-dashboards" +title = "Performant Dashboards Guide" + +[[options]] +id = "scope" +label = "Scope" +type = "select" + [[options.values]] + id = "initiative" + label = "Initiative" + + [[options.values]] + id = "project" + label = "Project" + +[[options]] +id = "starting-point" +label = "Starting Point" +type = "select" + [[options.values]] + id = "existing-oltp" + label = "Existing Postgres/MySQL" + + [[options.values]] + id = "scratch" + label = "From Scratch" + +[[options]] +id = "lang" +label = "Language" +type = "select" + [[options.values]] + id = "ts" + label = "TypeScript" + + [[options.values]] + id = "python" + label = "Python" + +# Flow Definitions +# Keys match the 'starting-point' values (primary branching logic) +[flows.existing-oltp] +stepsDir = "existing-oltp-db" +title = "Connect Existing DB" + +[flows.scratch] +stepsDir = "scratch" +title = "Build from Scratch" + diff --git a/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx new file mode 100644 index 0000000000..27583ea855 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx @@ -0,0 +1,335 @@ +--- +title: Stream Data from Postgres with Debezium +description: Learn how to adapt the Debezium CDC template to stream data from your PostgreSQL database to ClickHouse. +--- + +import { FileTree } from "@/components/mdx"; + +# Stream Data from Postgres with Debezium + +This guide shows you how to use the [**Debezium CDC Template**](https://github.com/514-labs/debezium-cdc). You will learn how to set up the Debezium connector with your PostgreSQL database and mirror your data into ClickHouse in real-time. + +## Architecture Overview + +At a high level, the pipeline works like this: +```txt +PostgreSQL -> Kafka -> ClickHouse +``` + +* **Debezium** acts as the bridge between PostgreSQL and Kafka. It watches for changes in your database and publishes them to Kafka topics. +* **MooseStack** acts as the bridge between Kafka and ClickHouse. It serves as your "pipeline-as-code" layer where you define your ClickHouse tables, your Kafka streams, and the transformation logic that connects them. + +This template uses two Kafka topics for each table: one for the raw data and one for the clean, processed data. The data flow is as follows: + +1. Change happens in PostgreSQL +2. Debezium publishes the change to Kafka (auto-creating a topic for each table) +3. Raw events are consumed from each Debezium-managed topic and transformed into a format that can be stored in ClickHouse +4. The transformed data is published to a second Moose Stream (the sink stream) +5. Data from the sink stream is synced into your ClickHouse table +6. Rows are deduplicated and versioned in the background in ClickHouse + +## Project Structure + +Here are the key files in the template you should know about: + + + + + + + + + + + + +* **`cdc-pipeline/1-sources/`**: Defines Kafka topics from Debezium +* **`cdc-pipeline/2-transforms/`**: Sanitizes CDC events and maps them to the destination +* **`cdc-pipeline/3-destinations/`**: Defines ClickHouse tables and streams +* **`docker-compose.dev.override.yaml`**: Infrastructure configuration (Kafka Connect, Redpanda) +* **`setup-cdc.ts`**: Script that registers the Debezium connector +* **`moose.config.toml`**: Project configuration (enables streaming) + +## Step 0: Clone the Template + +Make sure you clone the [Debezium CDC Template](https://github.com/514-labs/debezium-cdc) and install the dependencies: + +```bash +git clone https://github.com/514-labs/debezium-cdc.git +cd debezium-cdc +pnpm install +``` + +## Step 1: Configure Your Environment + +The template uses environment variables for database passwords and connector settings. + +1. Copy the `.env.example` file: + + ```bash + cp .env.example .env.dev + ``` + +2. Open `.env.dev` and customize the values for your environment. + + **Database Connection:** + Set these to point to your source PostgreSQL database. + ```properties + DB_HOST=your_postgres_host + DB_PORT=your_postgres_port + DB_NAME=your_postgres_db + DB_USER=your_postgres_user + DB_PASSWORD=your_postgres_password + ``` + + **CDC Configuration:** + Choose which tables you want to capture. + ```properties + # List of tables to capture (schema.table), separated by commas + CDC_TABLE_INCLUDE_LIST=public.* + + # Prefix for the Kafka topics (default: pg-cdc) + CDC_TOPIC_PREFIX=pg-cdc + ``` + +## Step 2: Prepare Your Database + +Debezium needs PostgreSQL's logical replication to work. + +1. **Check `wal_level`**: + Run this SQL command on your source database: + ```sql + SHOW wal_level; + ``` + It must be `logical`. If not, update your `postgresql.conf` and restart Postgres. + +2. **Create a Replication User**: + It is best to use a separate user for this. Run these commands: + ```sql + CREATE USER cdc_user WITH PASSWORD 'secure_password'; + ALTER USER cdc_user WITH REPLICATION; + GRANT USAGE ON SCHEMA public TO cdc_user; + GRANT SELECT ON ALL TABLES IN SCHEMA public TO cdc_user; + ``` + (Update your `.env.dev` file with this user's details). + +## Step 3: Start the Pipeline + +Start the development environment. The Moose CLI will start the infrastructure and run a script to register the Debezium connector. + +```bash +moose dev +``` + +Check the logs for these messages: +- Infrastructure starting (Redpanda, Kafka Connect, ClickHouse). +- `setup-cdc.ts` running. +- `✅ Connector registered!` + +Note: Moose does not manage Debezium or Kafka Connect by default. However, this template uses `docker-compose.dev.override.yaml` to add them. The example file starts Kafka Connect and includes a test database. If you want to use your own database, comment out the test database in that file and update `.env.dev`. See [Local Development](/moose/local-dev) for more details. + +## Step 4: Customize the Pipelines for Your Application + +The template comes set up for the provided test database. Follow these steps to change it for your own tables. + +> **Note:** These examples use the `customer_addresses` table from the template. Replace `CustomerAddress` with the names of your own tables (like `Users` or `Orders`). + +### 1. Import the Topics +When the connector runs and a change happens, Debezium automatically creates a topic in Redpanda if it hasn't seen an event for that table before. Since Debezium manages these topics, you need to import their definitions into your project: + +```bash +# Pulls topic definitions into cdc-pipeline/1-sources/externalTopics.ts +moose-cli kafka pull localhost:19092 --path cdc-pipeline/1-sources +``` + +### 2. Define Source Schemas +Moose imports the raw data streams as generic objects without types. You need to define what your data looks like so you when you transform the raw events you have complete type safety. + +#### Option A: Using your ORM Models (Recommended) +If you already use an ORM like Drizzle, you can reuse your existing models. + +The template uses Drizzle, and the models are in `postgres/src/schema.ts`. You can export the inferred type in `cdc-pipeline/oltp/schema.ts`: + +```typescript +import { customerAddresses } from "../../postgres/src/schema"; + +// Automatically infers: { id: number, first_name: string, ... } +export type CustomerAddress = typeof customerAddresses.$inferSelect; +``` + +Then, in your pipeline code, import the type and apply it to your stream: +```typescript +import { CustomerAddress } from "../../oltp/schema"; + +export const cdcCustomerAddresses = PgCdcPublicCustomerAddressesStream as Stream< + GenericCDCEvent +>; +``` + +#### Option B: Using Generation Tools +If you don't use an ORM, tools like [kanel](https://github.com/kristiandupont/kanel) or `pg-to-ts` can generate TypeScript interfaces from your database for you. + +```bash +# Example with kanel +npx kanel --connectionString $DATABASE_URL --output ./cdc-pipeline/generated-models +``` + +### 3. Model the Incoming Data (Create a Typed Topic) +This step models the raw data coming from Debezium. These events are complex objects that contain metadata, the "before" state of the row, and the "after" state. + +`GenericCDCEvent` (in `cdc-pipeline/models.ts`) matches this structure. By wrapping the raw topic with this type, your code knows exactly what the data looks like. + +```typescript +export type GenericCDCEvent = { + before: T | null; // The row before the change (null for inserts) + after: T | null; // The row after the change (null for deletes) + source: { // Debezium metadata + lsn: number; // Log Sequence Number (for ordering) + ts_ms: number; // Timestamp of the change + table: string; + }; + op: "c" | "u" | "d" | "r"; // Create, Update, Delete, Read + ts_ms: number; +}; +``` + +Update `cdc-pipeline/1-sources/typed-topics.ts` to export the typed stream. + +**Example:** + +```typescript +import { Stream } from "@514labs/moose-lib"; +import { PgCdcPublicCustomerAddressesStream } from "./externalTopics"; // Generated by kafka pull +import { GenericCDCEvent } from "../models"; +import { CustomerAddress } from "../../oltp/schema"; + +export const cdcCustomerAddresses = PgCdcPublicCustomerAddressesStream as Stream< + GenericCDCEvent +>; +``` + +
+✨ **Suggested Copilot Prompt** + +You can use this prompt to tell your AI assistant to generate the typed topics for all your tables at once. Open `cdc-pipeline/1-sources/typed-topics.ts` and ask: + +> "Import all the raw stream classes from `./externalTopics.ts` and all the OLTP types from `../../oltp/schema.ts`. For each table, export a new const named `cdc` that casts the raw stream to `Stream>`. Follow the pattern of the existing exports." + +
+ +### 4. Model the Destination Data (Flatten the Payload) +This step models the clean data that goes into ClickHouse. + +While the incoming data is nested (Step 3), the destination table should look just like your Postgres table. You need to "flatten" the structure so that `after.id` becomes just `id` in ClickHouse. + +You also need to add a few fields (`_is_deleted`, `lsn`, `ts_ms`) to handle updates and deletes correctly. + +Update `cdc-pipeline/3-destinations/olap-tables.ts`: + +```typescript +import { OlapTable, ClickHouseEngines, UInt64, UInt8 } from "@514labs/moose-lib"; +import { CustomerAddress } from "../../oltp/schema"; + +// 1. Define the OLAP Schema +// Take the fields from Postgres and add metadata +export type CdcFields = { + _is_deleted: UInt8; + ts_ms: UInt64; + lsn: UInt64; +}; + +export type OlapCustomerAddress = CustomerAddress & CdcFields; + +// 2. Define the ClickHouse Table +export const olapCustomerAddresses = new OlapTable( + "customer_addresses", + { + engine: ClickHouseEngines.ReplacingMergeTree, + ver: "lsn", + isDeleted: "_is_deleted", + orderByFields: ["id"], + } +); +``` + +You also need a sink stream. This acts as a buffer between your transformation and the final table. + +Update `cdc-pipeline/3-destinations/sink-topics.ts`: + +```typescript +import { Stream } from "@514labs/moose-lib"; +import { OlapCustomerAddress } from "../models"; +import { olapCustomerAddresses } from "./olap-tables"; + +// 3. Define the Destination Stream (The "Processed" Topic) +export const processedCustomerAddresses = new Stream( + "ProcessedCustomerAddresses", + { destination: olapCustomerAddresses } +); +``` + +### 5. Create the Transform +Write the function that maps the Source Stream to the Sink Stream. It cleans the data and converts types where needed. + +Create `cdc-pipeline/2-transforms/customer-addresses.ts`: + +```typescript +import { cdcCustomerAddresses } from "../1-sources/typed-topics"; +import { processedCustomerAddresses } from "../3-destinations/sink-topics"; +import { handleCDCPayload } from "./payload-handler"; // Helper from the template +import { GenericCDCEvent, OlapCustomerAddress } from "../models"; +import { CustomerAddress } from "../../oltp/schema"; + +// Connect Source Stream -> Destination Stream +cdcCustomerAddresses.addTransform( + processedCustomerAddresses, + (message: GenericCDCEvent) => { + // Use the helper function to clean the payload + const result = handleCDCPayload(message); + + // Return the clean data + return result as unknown as OlapCustomerAddress; + } +); +``` + +The `handleCDCPayload` function is a helper included in the template. It handles the logic for cleaning the data and managing deletes. You pass it the type of your source row, and it handles the rest. + +## Verification + +The pipeline is running! Any change in your Postgres `customer_addresses` table will instantly appear in ClickHouse. + +Check it by querying ClickHouse with the Moose CLI: + +```bash +moose query "SELECT * FROM customer_addresses" +``` + +## Advanced: Optimizing for ClickHouse + +The setup above uses your Postgres types directly. To make your analytics faster and cheaper, you should optimize your ClickHouse schema. + +This involves using special column types like: +* **LowCardinality**: For columns with a finite number (10,000 or less) of unique values (e.g. countries, states, etc.). +* **UInt64**: For IDs and timestamps. +* **ClickHouseDefault**: To handle empty (null) values efficiently. + +Here is a preview of what an optimized schema looks like: + +```typescript +export type OlapCustomerAddress = Omit< + CustomerAddress, + "id" | "country" | "state" | "work_address" +> & + CdcFields & { + // Optimized types + id: UInt64; + country: string & LowCardinality; + state: string & LowCardinality; + work_address: string & ClickHouseDefault<"''">; + }; +``` + +For a full guide on how to optimize your tables, see [Optimizing ClickHouse Schemas](/guides/clickhouse-optimization). + +## Next Steps: Transitioning to Production diff --git a/apps/framework-docs-v2/content/guides/data-management/change-data-capture/guide-overview.mdx b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/guide-overview.mdx new file mode 100644 index 0000000000..05a3182d76 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Change Data Capture Overview +description: Overview of implementing change data capture (CDC) with MooseStack +--- + +# Change Data Capture Overview + +This guide covers how to implement change data capture (CDC) to track and replicate database changes in real-time using MooseStack. + +## Overview + +Change Data Capture enables real-time synchronization and event-driven architectures by capturing database changes as they occur. + +## Getting Started + +Select a starting point from the sidebar to begin implementing CDC. + diff --git a/apps/framework-docs-v2/content/guides/data-management/impact-analysis/guide-overview.mdx b/apps/framework-docs-v2/content/guides/data-management/impact-analysis/guide-overview.mdx new file mode 100644 index 0000000000..e20651c381 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-management/impact-analysis/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Impact Analysis Overview +description: Overview of analyzing the impact of schema changes +--- + +# Impact Analysis Overview + +This guide covers how to analyze the impact of schema changes before applying them. + +## Overview + +Impact analysis helps you understand how schema changes will affect your queries, applications, and downstream systems. + +## Getting Started + +Select a starting point from the sidebar to begin analyzing schema changes. + diff --git a/apps/framework-docs-v2/content/guides/data-management/migrations/guide-overview.mdx b/apps/framework-docs-v2/content/guides/data-management/migrations/guide-overview.mdx new file mode 100644 index 0000000000..26c1088877 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-management/migrations/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Migrations Overview +description: Overview of managing database migrations with MooseStack +--- + +# Migrations Overview + +This guide covers best practices for managing database migrations in MooseStack projects. + +## Overview + +Database migrations allow you to evolve your schema safely and track changes over time. + +## Getting Started + +Select a starting point from the sidebar to begin managing migrations. + diff --git a/apps/framework-docs-v2/content/guides/data-warehousing/connectors/guide-overview.mdx b/apps/framework-docs-v2/content/guides/data-warehousing/connectors/guide-overview.mdx new file mode 100644 index 0000000000..7a7c174e2b --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-warehousing/connectors/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Connectors Overview +description: Overview of using connectors to integrate data sources with MooseStack +--- + +# Connectors Overview + +This guide covers how to use connectors to integrate various data sources with your MooseStack data warehouse. + +## Overview + +Connectors enable seamless integration with databases, APIs, and other data sources. + +## Getting Started + +Select a starting point from the sidebar to begin using connectors. + diff --git a/apps/framework-docs-v2/content/guides/data-warehousing/customer-data-platform/guide-overview.mdx b/apps/framework-docs-v2/content/guides/data-warehousing/customer-data-platform/guide-overview.mdx new file mode 100644 index 0000000000..ca95c8f960 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-warehousing/customer-data-platform/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Customer Data Platform Overview +description: Overview of building a customer data platform with MooseStack +--- + +# Customer Data Platform Overview + +This guide shows you how to build a customer data platform (CDP) using MooseStack. + +## Overview + +A customer data platform unifies customer data from multiple sources to create a comprehensive view of each customer. + +## Getting Started + +Select a starting point from the sidebar to begin building your customer data platform. + diff --git a/apps/framework-docs-v2/content/guides/data-warehousing/operational-analytics/guide-overview.mdx b/apps/framework-docs-v2/content/guides/data-warehousing/operational-analytics/guide-overview.mdx new file mode 100644 index 0000000000..48c542f72b --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-warehousing/operational-analytics/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Operational Analytics Overview +description: Overview of implementing operational analytics with MooseStack +--- + +# Operational Analytics Overview + +This guide covers how to implement operational analytics to monitor and optimize your business operations. + +## Overview + +Operational analytics provides real-time insights into business processes, infrastructure, and application performance. + +## Getting Started + +Select a starting point from the sidebar to begin implementing operational analytics. + diff --git a/apps/framework-docs-v2/content/guides/data-warehousing/pipelines/guide-overview.mdx b/apps/framework-docs-v2/content/guides/data-warehousing/pipelines/guide-overview.mdx new file mode 100644 index 0000000000..cf1c8d1c36 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-warehousing/pipelines/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Pipelines Overview +description: Overview of building data pipelines with MooseStack +--- + +# Pipelines Overview + +This guide covers how to build and manage data pipelines using MooseStack to transform, aggregate, and move data through your data warehouse. + +## Overview + +Data pipelines automate the flow of data from source systems to your data warehouse, enabling reliable and scalable data processing. + +## Getting Started + +Select a starting point from the sidebar to begin building data pipelines. + diff --git a/apps/framework-docs-v2/content/guides/data-warehousing/startup-metrics/guide-overview.mdx b/apps/framework-docs-v2/content/guides/data-warehousing/startup-metrics/guide-overview.mdx new file mode 100644 index 0000000000..dbf399285d --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-warehousing/startup-metrics/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Startup Metrics Overview +description: Overview of tracking key startup metrics with MooseStack +--- + +# Startup Metrics Overview + +This guide covers how to track and analyze key startup metrics using MooseStack. + +## Overview + +Startup metrics help you measure product growth, user engagement, and business performance. + +## Getting Started + +Select a starting point from the sidebar to begin tracking startup metrics. + diff --git a/apps/framework-docs-v2/content/guides/methodology/data-as-code/guide-overview.mdx b/apps/framework-docs-v2/content/guides/methodology/data-as-code/guide-overview.mdx new file mode 100644 index 0000000000..ce655f24ae --- /dev/null +++ b/apps/framework-docs-v2/content/guides/methodology/data-as-code/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Data as Code Overview +description: Overview of implementing data as code practices with MooseStack +--- + +# Data as Code Overview + +This guide covers how to implement data as code practices using MooseStack's code-first approach. + +## Overview + +Data as code enables version control, collaboration, and automated deployment of data infrastructure. + +## Getting Started + +Select a starting point from the sidebar to begin implementing data as code. + diff --git a/apps/framework-docs-v2/content/guides/methodology/dora-for-data/guide-overview.mdx b/apps/framework-docs-v2/content/guides/methodology/dora-for-data/guide-overview.mdx new file mode 100644 index 0000000000..5371fb57d2 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/methodology/dora-for-data/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: DORA for Data Overview +description: Overview of implementing DORA metrics for data engineering teams +--- + +# DORA for Data Overview + +This guide shows you how to implement DORA (DevOps Research and Assessment) metrics for data engineering teams. + +## Overview + +DORA metrics help data engineering teams measure and improve their delivery performance. + +## Getting Started + +Select a starting point from the sidebar to begin implementing DORA metrics. + diff --git a/apps/framework-docs-v2/content/guides/strategy/ai-enablement/guide-overview.mdx b/apps/framework-docs-v2/content/guides/strategy/ai-enablement/guide-overview.mdx new file mode 100644 index 0000000000..9fe85df641 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/strategy/ai-enablement/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: AI Enablement Overview +description: Overview of enabling AI capabilities with MooseStack +--- + +# AI Enablement Overview + +This guide covers how to enable AI capabilities in your data stack using MooseStack. + +## Overview + +AI enablement requires robust data infrastructure, vector search capabilities, and seamless LLM integration. + +## Getting Started + +Select a starting point from the sidebar to begin enabling AI capabilities. + diff --git a/apps/framework-docs-v2/content/guides/strategy/data-foundation/guide-overview.mdx b/apps/framework-docs-v2/content/guides/strategy/data-foundation/guide-overview.mdx new file mode 100644 index 0000000000..2f278fbdc9 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/strategy/data-foundation/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Data Foundation Overview +description: Overview of building a solid data foundation for your organization +--- + +# Data Foundation Overview + +This guide covers how to build a solid data foundation that enables reliable, scalable, and maintainable data operations. + +## Overview + +A strong data foundation is essential for long-term success with data-driven decision making. + +## Getting Started + +Select a starting point from the sidebar to begin building your data foundation. + diff --git a/apps/framework-docs-v2/content/guides/strategy/olap-evaluation/guide-overview.mdx b/apps/framework-docs-v2/content/guides/strategy/olap-evaluation/guide-overview.mdx new file mode 100644 index 0000000000..881e730a32 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/strategy/olap-evaluation/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: OLAP Evaluation Overview +description: Overview of evaluating OLAP databases and systems for your use case +--- + +# OLAP Evaluation Overview + +This guide covers how to evaluate OLAP databases and systems to choose the right solution for your analytical workloads. + +## Overview + +Choosing the right OLAP database requires careful evaluation of performance, scale, and feature requirements. + +## Getting Started + +Select a starting point from the sidebar to begin evaluating OLAP systems. + diff --git a/apps/framework-docs-v2/content/guides/strategy/platform-engineering/guide-overview.mdx b/apps/framework-docs-v2/content/guides/strategy/platform-engineering/guide-overview.mdx new file mode 100644 index 0000000000..1f5cb01229 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/strategy/platform-engineering/guide-overview.mdx @@ -0,0 +1,17 @@ +--- +title: Platform Engineering Overview +description: Overview of implementing platform engineering practices for data infrastructure +--- + +# Platform Engineering Overview + +This guide covers how to implement platform engineering practices to build and maintain self-service data infrastructure. + +## Overview + +Platform engineering enables teams to build and operate data infrastructure more efficiently through self-service tools and automation. + +## Getting Started + +Select a starting point from the sidebar to begin implementing platform engineering practices. + diff --git a/apps/framework-docs-v2/content/moosestack/configuration.mdx b/apps/framework-docs-v2/content/moosestack/configuration.mdx index 28702959eb..2236777903 100644 --- a/apps/framework-docs-v2/content/moosestack/configuration.mdx +++ b/apps/framework-docs-v2/content/moosestack/configuration.mdx @@ -4,7 +4,7 @@ description: Configure your MooseStack project order: 1 --- -import { Callout } from "@/components/mdx"; +import { Callout, FileTree } from "@/components/mdx"; # Project Configuration @@ -117,14 +117,15 @@ MOOSE_
__=value ### Complete Example **File structure:** -``` -my-moose-project/ -├── .env # Base config -├── .env.dev # Dev overrides -├── .env.prod # Prod overrides -├── .env.local # Local secrets (gitignored) -└── moose.config.toml # Structured config -``` + + + + + + + + + **.env** (committed): ```bash @@ -233,6 +234,17 @@ native_port = 9000 # Optional list of additional databases to create on startup (Default: []) # additional_databases = ["analytics", "staging"] +# ClickHouse cluster configuration for replicated tables (optional) +# Define clusters for use with ON CLUSTER DDL operations and distributed tables +# In local dev, Moose creates single-node clusters. In production, names must match your ClickHouse remote_servers config. +# +# Note: Cluster names are deployment directives that control HOW Moose runs DDL (via ON CLUSTER), +# not schema properties. Changing cluster names in your table configs won't trigger table recreation. +# [[clickhouse_config.clusters]] +# name = "default" +# [[clickhouse_config.clusters]] +# name = "my_cluster" + # HTTP server configuration for local development [http_server_config] # Host to bind the webserver to (Default: "localhost") diff --git a/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-cli.mdx b/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-cli.mdx new file mode 100644 index 0000000000..05bc3fb0d9 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-cli.mdx @@ -0,0 +1,70 @@ +--- +title: Serverless (moose migrate) +description: Reference documentation for the manual migration CLI command used in serverless deployments. +order: 7 +category: olap +--- + +import { Callout } from "@/components/mdx"; + +# Serverless (moose migrate) + +The **Serverless** deployment model relies on the `moose migrate` CLI command to execute planned schema changes. This gives you explicit control over when migrations run, which is essential for architectures where Moose is integrated as a library rather than a standalone server. + +## Overview + +In serverless or library-based deployments, Moose does not control the application runtime. Therefore, migrations must be triggered externally, typically as a step in a CI/CD pipeline or a manual administrative action. + +| Feature | Description | +| :--- | :--- | +| **Manual Control** | Migrations run only when you explicitly execute the command. | +| **CI/CD Integration** | Designed to run as a discrete step in deployment pipelines (e.g., GitHub Actions). | +| **Drift Protection** | Validates `remote_state.json` against the target database before execution. | +| **Direct Connection** | Connects directly to ClickHouse using a connection string. | + +## Command Reference + +```bash +moose migrate --clickhouse-url +``` + +### Options + +| Option | Description | Required | +| :--- | :--- | :--- | +| `--clickhouse-url` | The full connection string to the target ClickHouse database (e.g., `clickhouse://user:pass@host:9440/db`). | Yes | + +## Execution Lifecycle + +When `moose migrate` is executed: + +1. **Load Plan:** Reads `migrations/plan.yaml` from the current directory. +2. **Check Database Drift:** Connects to the provided ClickHouse URL and compares the current schema against `remote_state.json`. +3. **Abort on Drift:** If the database state does not match the snapshot, the process exits with an error code. +4. **Execute Migration:** Applies the operations defined in `plan.yaml` sequentially. +5. **Report Success:** Exits with code 0 if all operations succeed. + +## Failure Modes + +| Condition | Outcome | Resolution | +| :--- | :--- | :--- | +| **Drift Detected** | Command fails (exit code 1). | Regenerate the plan against the current production DB and retry. | +| **Connection Error** | Command fails (exit code 1). | Check network connectivity and credentials in the connection string. | +| **SQL Error** | Command fails (exit code 1). | Fix the problematic operation in `plan.yaml` or the database state and retry. | + +## CI/CD Example + +This command is typically used in a deployment pipeline before updating the application code. + +```yaml +# Example GitHub Actions step +- name: Apply Migrations + run: moose migrate --clickhouse-url "$CLICKHOUSE_URL" + env: + CLICKHOUSE_URL: ${{ secrets.CLICKHOUSE_URL }} +``` + +## See Also + +- [Planned Migrations](/moosestack/migrate/planned-migrations) - Generating the plan files. +- [Server Runtime](/moosestack/migrate/apply-planned-migrations-service) - The automatic alternative for full server deployments. diff --git a/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-service.mdx b/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-service.mdx new file mode 100644 index 0000000000..37a6c4a7d5 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-service.mdx @@ -0,0 +1,61 @@ +--- +title: Server Runtime +description: Reference documentation for automatic migration execution in the Moose server runtime. +order: 8 +category: olap +--- + +import { Callout } from "@/components/mdx"; + +# Server Runtime + +The **Server Runtime** deployment model (invoked via `moose prod`) includes an automatic migration runner that executes planned changes during the application boot sequence. + +## Overview + +When running as a full server, Moose orchestrates the entire lifecycle of your data stack. Migrations are treated as a prerequisite for starting the application: the server will not accept traffic or process data until the database schema matches the code definition. + +| Feature | Description | +| :--- | :--- | +| **Automatic Execution** | Migrations run automatically when the `moose prod` command starts. | +| **Drift Protection** | The server validates the database state against `remote_state.json` before applying changes. | +| **Code Validation** | The server ensures the deployed code matches `local_infra_map.json` to prevent mismatches. | +| **Zero-Touch** | No separate CLI commands or CI/CD steps are required to apply migrations. | + +## Command Reference + +The migration logic is embedded within the production server start command. + +```bash +moose prod +``` + +**Environment Variables:** +The server requires access to the ClickHouse database, typically configured via `moose.config.toml` or environment variables overridden at runtime. + +## Execution Lifecycle + +When `moose prod` starts, it performs the following sequence: + +1. **Load Plan:** Reads `migrations/plan.yaml` from the deployment artifact. +2. **Check Code Consistency:** Verifies that the running application code matches `local_infra_map.json`. If not, it aborts to prevent deploying code that doesn't match the plan. +3. **Check Database Drift:** Connects to ClickHouse and compares the current schema against `remote_state.json`. If drift is detected, it aborts. +4. **Execute Migration:** Applies the operations defined in `plan.yaml`. +5. **Start Services:** Once migrations succeed, the Ingestion API, Consumption API, and Streaming workers are started. + +## Failure Modes + +| Condition | Outcome | Resolution | +| :--- | :--- | :--- | +| **Drift Detected** | Server fails to start. | Regenerate the plan against the current production DB and redeploy. | +| **Plan Mismatch** | Server fails to start. | Ensure the `migrations/` directory matches the code in your deployment artifact. | +| **Migration Error** | Server fails to start. | Fix the schema issue or plan file, then redeploy. | + + +If you are using Boreal Hosting, this process is handled automatically. The platform ensures that your application only becomes healthy once `moose prod` has successfully completed the migration phase. + + +## See Also + +- [Planned Migrations](/moosestack/migrate/planned-migrations) - Generating the plan files. +- [Serverless (moose migrate)](/moosestack/migrate/apply-planned-migrations-cli) - The manual alternative for serverless deployments. diff --git a/apps/framework-docs-v2/content/moosestack/migrate/auto-inferred.mdx b/apps/framework-docs-v2/content/moosestack/migrate/auto-inferred.mdx new file mode 100644 index 0000000000..a97ae10c4c --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/auto-inferred.mdx @@ -0,0 +1,108 @@ +--- +title: Auto-Inferred Migrations +description: Reference documentation for Moose's auto-inferred migration system used during local development. +order: 3 +category: olap +--- + +import { Callout } from "@/components/mdx"; + +# Auto-Inferred Migrations + +**Auto-inferred migrations** are a schema evolution mechanism in Moose that automatically detects changes in your data models and applies them to the underlying database in real-time. This system is designed primarily for local development. + +## Command + +Auto-inferred migrations are enabled implicitly when running the development server: + +```bash +moose dev +``` + +## Production Usage + +While technically possible to use auto-inferred migrations in production environments, it is **strongly discouraged**. + + + Auto-inferred migrations will immediately drop columns containing data if a field is renamed or removed in the code. In production, this leads to irreversible data loss. + + +Production deployments should always use [Planned Migrations](/moosestack/migrate/planned-migrations) to ensure schema changes are reviewed, tested, and safe. + +## Behavior + +When active, the auto-inference engine performs the following cycle: + +1. **Monitor:** Watches the file system for changes in exported table definitions in your data model files. +2. **Diff:** Compares the code-defined schema against the actual schema of the running ClickHouse instance. +3. **Generate:** Creates the necessary SQL DDL statements to reconcile the difference. +4. **Apply:** Executes the SQL statements immediately against the database. + +## Operation Reference + +The following table describes how code changes are translated into database operations by the auto-inference engine. + +| Code Change | Database Operation | SQL Equivalent (Approximate) | Data Impact | +| :--- | :--- | :--- | :--- | +| **New Table** | Create Table | `CREATE TABLE ...` | Safe | +| **Add Field** | Add Column | `ALTER TABLE ... ADD COLUMN ...` | Safe | +| **Remove Field** | Drop Column | `ALTER TABLE ... DROP COLUMN ...` | **Destructive** (Data Loss) | +| **Change Field Type** | Modify Column | `ALTER TABLE ... MODIFY COLUMN ...` | Potentially Destructive (Cast dependent) | +| **Rename Field** | Drop + Add | `DROP COLUMN old`; `ADD COLUMN new` | **Destructive** (Data Loss - see limitations) | +| **Remove Table** | Drop Table | `DROP TABLE ...` | **Destructive** | + +## Limitations and Safety + +### Renaming Fields + +The auto-inference engine is stateless regarding user intent. It cannot distinguish between **renaming** a field and **deleting one field to add another**. + +If you rename `user_id` to `uid`: +1. Moose sees `user_id` is missing from the code -> Generates `DROP COLUMN user_id`. +2. Moose sees `uid` is new in the code -> Generates `ADD COLUMN uid`. + +**Result:** The column is dropped and re-added empty. Data in the original column is lost immediately. + + + To rename columns without data loss, you must use [Planned Migrations](/moosestack/migrate/planned-migrations) and manually adjust the migration plan to use a `rename` operation instead of `drop` + `add`. + + +### Destructive Operations + +Auto-inferred migrations do not prompt for confirmation before dropping tables or columns. If you comment out a table export or remove a field definition, the corresponding data structure in the database is removed immediately. + +## Configuration + +Auto-inferred migrations rely on the `olap` feature flag in your project configuration. + +```toml filename="moose.config.toml" +[features] +olap = true # enabled by default +``` + +## CLI Output Reference + +The CLI communicates migration actions via standard output prefixes in the terminal. + +| Symbol | Meaning | Description | +| :--- | :--- | :--- | +| `+` | Add | Creating a new table or adding a column. | +| `-` | Remove | Dropping a table or removing a column. | +| `~` | Modify | Changing a column's data type or properties. | + +### Example Output + +```text +⢹ Processing Infrastructure changes from file watcher + ~ Table page_views: + Column changes: + + user_agent: String + - referrer: String + ~ timestamp: DateTime -> DateTime64(3) +``` + +## See Also + +- [Planned Migrations](/moosestack/migrate/planned-migrations) - The reference for production-grade migration workflows. +- [Schema Change Reference](/moosestack/migrate/reference) - Detailed breakdown of migration plan objects. +- [Serverless (moose migrate)](/moosestack/migrate/apply-planned-migrations-cli) - Commands for managing migrations manually. diff --git a/apps/framework-docs-v2/content/moosestack/olap/schema-change.mdx b/apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx similarity index 93% rename from apps/framework-docs-v2/content/moosestack/olap/schema-change.mdx rename to apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx index 1e7e359634..7b10689f44 100644 --- a/apps/framework-docs-v2/content/moosestack/olap/schema-change.mdx +++ b/apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx @@ -1,8 +1,8 @@ --- -title: Handling Failed Migrations +title: Failed Migrations description: Recover from failed migrations and safely achieve desired type changes order: 13 -category: olap +category: migrate --- import { Callout, LanguageTabs, LanguageTabContent } from "@/components/mdx"; @@ -46,25 +46,20 @@ Copy the mutation ID from the terminal logs and run the following command to kil ### Kill the mutation - If you have the `mutation_id`: -```sql filename="ClickHouse" copy -KILL MUTATION WHERE mutation_id = ''; +```bash filename="Terminal" copy +moose query "KILL MUTATION WHERE mutation_id = '';" ``` - If you didn't capture the ID, find it and kill by table: -```sql filename="ClickHouse" copy -SELECT mutation_id, command, is_done, latest_fail_reason -FROM system.mutations -WHERE database = currentDatabase() AND table = '' -ORDER BY create_time DESC; - -KILL MUTATION WHERE database = currentDatabase() AND table = ''; +```bash filename="Terminal" copy +moose query "SELECT mutation_id, command, is_done, latest_fail_reason FROM system.mutations WHERE database = currentDatabase() AND table = '' ORDER BY create_time DESC;" ``` ClickHouse ALTERs are implemented as asynchronous mutations, not transactional. If a mutation fails mid-way, some parts may have been rewritten while others were not, leaving the table partially transformed. The failed mutation also remains queued until you kill it. Clear the mutation first, then proceed. - +{/* Soon, Moose will automatically generate a local DDL plan that kills the mutation and "rolls back" the transformation to the data that was changed before the failure occurred. - + */} ### Revert your code to match the current DB schema diff --git a/apps/framework-docs-v2/content/moosestack/migrate/index.mdx b/apps/framework-docs-v2/content/moosestack/migrate/index.mdx index 28bdfee785..f8e7190ddc 100644 --- a/apps/framework-docs-v2/content/moosestack/migrate/index.mdx +++ b/apps/framework-docs-v2/content/moosestack/migrate/index.mdx @@ -1,376 +1,117 @@ --- -title: Migrations & Planning -description: How Moose handles infrastructure migrations and planning +title: Migrations +description: Understanding how Moose manages database schema changes through code order: 0 category: migrate --- -import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; +import { Callout } from "@/components/mdx"; -# Moose Migrate +# Migrations -Moose's migration system works like version control for your infrastructure. It automatically detects changes in your code and applies them to your data infrastructure with confidence. +Migrations synchronize your code-defined database schema with your production infrastructure. As your application evolves, you'll add/remove fields, change data types, and restructure tables. Moose Migrate handles these schema changes safely and reliably. - -Moose tracks changes across: -- OLAP Tables and Materialized Views -- Streaming Topics -- API Endpoints -- Workflows - +## How Migrations Work +Moose Migrate operates by comparing two states: -## How It Works +1. **Your code** - Tables and streams defined in your application +2. **Your database** - The actual schema in ClickHouse, Kafka, or Redpanda -Moose collects all objects defined in your main file (`index.ts` for TypeScript or `main.py` for Python) and automatically generates infrastructure operations to match your code: +When these states differ, Moose Migrate generates operations to bring them into alignment. These operations might include: - - +- Adding or dropping tables +- Adding, removing, or renaming columns +- Changing data types +- Creating or modifying streaming topics -```ts file="app/index.ts" -interface UserSchema { - id: string; - name: string; - email: string; -} +The full list of operations is available in the [Migration Plan Format](/moosestack/migrate/plan-format) documentation. -export const usersTable = new OlapTable("Users"); -export const userEvents = new Stream("Users"); -``` +## Core Concepts - - +Migrations in Moose revolve around three key decisions: -```python file="app/main.py" -from pydantic import BaseModel -from moose_lib import OlapTable, Stream +| Concept | What it is | Where to go | +| :--- | :--- | :--- | +| [Lifecycle Management](#lifecycle-management) | Controlling *what* changes are allowed (e.g., preventing data deletion). | [Overview](/moosestack/migrate/lifecycle) • [Fully Managed](/moosestack/migrate/lifecycle-fully-managed) • [Deletion Protected](/moosestack/migrate/lifecycle-deletion-protected) • [Externally Managed](/moosestack/migrate/lifecycle-externally-managed) | +| [Generating Migrations](#generating-migrations) | Deciding *how* changes are generated. | [Overview](/moosestack/migrate/modes) • [Auto-Inferred](/moosestack/migrate/auto-inferred) • [Planned](/moosestack/migrate/planned-migrations) | +| [Applying Changes](#applying-changes-to-production) | The workflow for executing migrations: serverless (`moose migrate`) vs server runtime (`moose prod`). | [Serverless](/moosestack/migrate/apply-planned-migrations-cli) • [Server Runtime](/moosestack/migrate/apply-planned-migrations-service) | -class UserSchema(BaseModel): - id: str - name: str - email: str -users_table = OlapTable[UserSchema]("Users") -user_events = Stream[UserSchema]("Users") -``` +### Lifecycle Management - - - -When you add these objects, Moose automatically creates: -- A ClickHouse table named `Users` with the `UserSchema` -- A Redpanda topic named `Users` with the `UserSchema` - -## Development Workflow - -When running your code in development mode, Moose will automatically hot-reload migrations to your local infrastructure as you save code changes. - -### Quick Start - -Start your development environment: - -```bash filename="Terminal" copy -moose dev -``` - -This automatically: -1. Recursively watches your `/app` directory for code changes -2. Parses objects defined in your main file -3. Compares the new objects with the current infrastructure state Moose stores internally -4. Generates and applies migrations in real-time based on the differences -5. Provides immediate feedback on any errors or warnings -6. Updates the internal state of your infrastructure to reflect the new state - -### Example: Adding a New Table +For each table and stream resource defined in your code, you can control *what* changes are allowed (e.g., preventing data deletion, ignoring schema changes, etc.) with the `LifeCycle` configuration property: +```ts +import { OlapTable, LifeCycle } from "@514labs/moose-lib"; -```ts file="app/index.ts" {6} copy -// Before -export const usersTable = new OlapTable("Users"); - -// After (add analytics table) -export const usersTable = new OlapTable("Users"); -export const analyticsTable = new OlapTable("Analytics"); -``` - - - - -```python file="app/main.py" {6} copy -# Before -users_table = OlapTable[UserSchema]("Users") - -# After (add analytics table) -users_table = OlapTable[UserSchema]("Users") -analytics_table = OlapTable[AnalyticsSchema]("Analytics") -``` - - - - -**What happens:** -- Moose detects the new `analyticsTable` object -- Compares: "No Analytics table exists" -- Generates migration: "Create Analytics table" -- Applies migration automatically -- Updates internal state - -In your terminal, you will see a log that shows the new table being created: -```bash -⠋ Processing Infrastructure changes from file watcher - + Table: Analytics Version None - id: String, number: Int64, status: String - - deduplicate: false -``` -### Example: Schema Changes - - - - -```ts file="app/index.ts" {8} copy -import { Key } from "@514labs/moose-lib"; - -// After (add age field) -interface UserSchema { - id: Key; +interface Schema { + id: string; name: string; - email: string; - age: number; // New field + age: number; } + +const table = new OlapTable("table_name", { + lifeCycle: LifeCycle.FULLY_MANAGED + orderByFields: ["id"] +}); ``` +```py +from moose_lib import OlapTable, LifeCycle, OlapConfig +from pydantic import BaseModel -```python file="app/main.py" {8} copy -from moose_lib import Key +class Schema(BaseModel): + id: str + name: str + age: int -# After (add age field) -class UserSchema(BaseModel): - id: Key[str] - name: str - email: str - age: int # New field +table = OlapTable[Schema]("table_name", OlapConfig( + life_cycle=LifeCycle.FULLY_MANAGED + order_by_fields=["id"] +)) ``` - -**What happens:** -- Moose detects the new `age` field -- Generates migration: "Add age column to Users table" -- Applies migration -- Existing rows get NULL/default values - -## Production Workflow +| Option | Behavior | Use When | +| :--- | :--- | :--- | +| [`FULLY_MANAGED`](/moosestack/migrate/lifecycle-fully-managed) (default) | Automatically modifies resources to match your code, including destructive operations. | When you're developing new tables that you want your application to manage and evolve over time. | +| [`DELETION_PROTECTED`](/moosestack/migrate/lifecycle-deletion-protected) | Automatically modifies resources to match your code, but blocks destructive operations (drops, deletions). | When you want to protect critical production tables from accidental data loss. | +| [`EXTERNALLY_MANAGED`](/moosestack/migrate/lifecycle-externally-managed) | Does not modify resources. You manage the schema manually directly in your database. | When you have existing tables that you want to manage outside of your application, or if you're using a Managed CDC service like [ClickPipes](https://clickhouse.com/cloud/clickpipes) or [PeerDB](https://peerdb.io) to manage your schema. | -Moose supports two deployment patterns: **Moose Server** and **Serverless**. - -### Moose Server Deployments - -For deployments with a running Moose server, preview changes before applying: - -```bash filename="Terminal" copy -moose plan --url https://your-production-instance --token -``` - - -Remote planning requires authentication: - -1. Generate a token: `moose generate hash-token` -2. Configure your server: -```toml filename="moose.config.toml" copy -[authentication] -admin_api_key = "your-hashed-token" -``` -3. Use the token with `--token` flag - - -**Deployment Flow:** -1. **Develop locally** with `moose dev` -2. **Test changes** in local environment -3. **Plan against production**: `moose plan --url --token ` -4. **Review changes** carefully -5. **Deploy** - Moose applies migrations automatically on startup - -### Serverless Deployments - -For serverless deployments (no Moose server), use the ClickHouse connection directly: - -```bash filename="Terminal" copy -# Step 1: Generate migration files -moose generate migration --clickhouse-url --save - -# Step 2: Preview changes in PR -moose plan --clickhouse-url clickhouse://user:pass@host:port/database - -# Step 3: Execute migration after merge -moose migrate --clickhouse-url -``` - -**Deployment Flow:** -1. **Develop locally** with `moose dev` -2. **Generate migration plan**: `moose generate migration --clickhouse-url --save` -3. **Create PR** with `plan.yaml`, `remote_state.json`, `local_infra_map.json` -4. **PR validation**: Run `moose plan --clickhouse-url ` in CI to preview changes -5. **Review** migration files and plan output -6. **Merge PR** -7. **Execute migration**: Run `moose migrate --clickhouse-url ` in CI/CD - - -Requires `state_config.storage = "clickhouse"` in `moose.config.toml`: -```toml filename="moose.config.toml" copy -[state_config] -storage = "clickhouse" - -[features] -olap = true -data_models_v2 = true -``` - - - -Your ClickHouse instance needs the KeeperMap engine for state storage and migration locking. - -✅ **ClickHouse Cloud**: Works out of the box -✅ **`moose dev` or `moose prod`**: Already configured -⚠️ **Self-hosted ClickHouse**: See [ClickHouse KeeperMap documentation](https://clickhouse.com/docs/en/engines/table-engines/special/keeper-map) for setup requirements - - -### State Storage Options - -Moose migrations require storing infrastructure state and coordinating locks. You can choose between two backends: - -**ClickHouse State Storage (Default)** -Uses the `_MOOSE_STATE` KeeperMap table. Best for: -- ClickHouse Cloud (works out of the box) -- Self-hosted with ClickHouse Keeper already configured - -**Redis State Storage** -Uses Redis for state and locking. Best for: -- Existing Redis infrastructure -- Multi-tenant deployments (isolated by `key_prefix`) -- When ClickHouse Keeper isn't available - -**Configuration:** -```toml filename="moose.config.toml" copy -[state_config] -storage = "redis" # or "clickhouse" (default) -``` - -**Usage with Redis:** -```bash filename="Terminal" copy -# With environment variable (recommended) -export MOOSE_REDIS_CONFIG__URL="redis://host:port" -moose migrate --clickhouse-url clickhouse://... - -# Or with CLI flag -moose migrate \ - --clickhouse-url clickhouse://... \ - --redis-url redis://host:port -``` - - -The ClickHouse URL is always required, even when using Redis for state storage. + +You configure lifecycle modes individually on each `OlapTable` and `Stream` object. This allows you to mix fully managed development tables with deletion-protected production tables and externally managed legacy resources in the same application. -### Understanding Plan Output - -Moose shows exactly what will change: - -```bash - + Table: Analytics Version None - id: String, number: Int64, status: String - - deduplicate: false - + Table: Users Version None - id: String, name: String, email: String - - deduplicate: false -``` - -## Migration Types - -| Change Type | Infrastructure Impact | Data Impact | -|-------------|----------------------|-------------| -| **Add new object** | New table/stream/API created | No impact | -| **Remove object** | Table/stream/API dropped | All data lost | -| **Add field** | New column created | Existing rows get NULL/default | -| **Remove field** | Column dropped | Data permanently lost | -| **Change type** | Column altered | Data converted if compatible | +[Compare Lifecycle Management Modes →](/moosestack/migrate/lifecycle) -For detailed examples of each migration type, see [Migration Types](/moosestack/migrate/migration-types). +### Generating Migrations -## Viewing Infrastructure State +Moose Migrate provides two complementary ways to generate migrations. Each is designed for use in different stages of the application lifecycle, and it's best practice to use both in your workflow: -### Via CLI -```bash -# Check current infrastructure objects -moose ls +| Option | Behavior | Use Case | +| :--- | :--- | :--- | +| [Auto-Inferred](/moosestack/migrate/auto-inferred) | Updates database instantly on file save. Fast iteration, but can be destructive. | Local development, fast prototyping | +| [Planned](/moosestack/migrate/planned-migrations) | Generates reviewable plan files. Safe, deterministic, with drift detection. | Production deployment, CI/CD | -# View migration logs -moose logs -``` - -### Via Direct Connection - -Connect to your local infrastructure using details from `moose.config.toml`: - -```toml file="moose.config.toml" -[features] -olap = true # ClickHouse for analytics -streaming_engine = true # Redpanda for streaming -workflows = false # Temporal for workflows - -[clickhouse_config] -host = "localhost" -host_port = 18123 -native_port = 9000 -db_name = "local" -user = "panda" -password = "pandapass" - -[redpanda_config] -broker = "localhost:19092" -message_timeout_ms = 1000 -retention_ms = 30000 -replication_factor = 1 -``` -## Best Practices +[Compare Migration Modes →](/moosestack/migrate/modes) -### Development -- Use `moose dev` for all local development -- Monitor plan outputs for warnings -- Test schema changes with sample data - -### Production -- Always use remote planning before deployments -- Review changes carefully in production plans -- Maintain proper authentication -- Test migrations in staging first - -### Managing TTL Outside Moose - -If you're managing ClickHouse TTL settings through other tools or want to avoid migration failures from TTL drift, you can configure Moose to ignore TTL changes: - -```toml filename="moose.config.toml" copy -[migration_config] -ignore_operations = ["ModifyTableTtl", "ModifyColumnTtl"] -``` +### Applying Changes -This tells Moose to: -- Skip generating TTL change operations in migration plans -- Ignore TTL differences during drift detection +You have two options for executing planned migrations against your production database: -You'll still get migrations for all other schema changes (adding tables, modifying columns, etc.), but TTL changes won't block your deployments. +| Mode | Behavior | Use Case | +| :--- | :--- | :--- | +| [Serverless (`moose migrate`)](/moosestack/migrate/apply-planned-migrations-cli) | You run migrations manually or via CI/CD. | Integrating Moose OLAP into an existing application as a library. | +| [Server Runtime (`moose prod`)](/moosestack/migrate/apply-planned-migrations-service) | Migrations are automatically run within the Moose Runtime on server startup. | Building a dedicated analytics service with the full Moose Runtime. | -## Troubleshooting -### Authentication Errors -- Verify your authentication token -- Generate a new token: `moose generate hash-token` -- Check server configuration in `moose.config.toml` +## Advanced Topics -### Migration Issues -- Check `moose logs` for detailed error messages -- Verify object definitions in your main file -- Ensure all required fields are properly typed -- **Stuck migration lock**: If you see "Migration already in progress" but no migration is running, wait 5 minutes for automatic expiry or manually clear it: - ```sql - DELETE FROM _MOOSE_STATE WHERE key = 'migration_lock'; - ``` +- [Failed Migrations](/moosestack/migrate/failed-migrations) - Recover from migration errors diff --git a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-deletion-protected.mdx b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-deletion-protected.mdx new file mode 100644 index 0000000000..ac66cdd11f --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-deletion-protected.mdx @@ -0,0 +1,70 @@ +--- +title: Deletion Protected Lifecycle +description: Safe lifecycle mode that allows additive changes but prevents destructive operations like drops. +order: 22 +category: migrate +--- + +import { LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Deletion Protected Lifecycle + +`LifeCycle.DELETION_PROTECTED` allows Moose to automatically add new database structures but prevents it from removing existing ones. This mode is perfect for production environments where you want to evolve your schema safely without risking data loss. + +## Behavior + +**What Moose will do:** +- Add new columns, tables +- Modify column types (if compatible) +- Update non-destructive configurations + +**What Moose won't do:** +- Drop columns or tables +- Perform destructive schema changes + +## Examples + + + + +```ts filename="DeletionProtectedExample.ts" copy +import { OlapTable, LifeCycle, ClickHouseEngines } from "@514labs/moose-lib"; + +interface ProductEvent { + id: string; + productId: string; + timestamp: Date; + action: string; +} + +const productAnalytics = new OlapTable("product_analytics", { + orderByFields: ["timestamp", "productId"], + engine: ClickHouseEngines.ReplacingMergeTree, + lifeCycle: LifeCycle.DELETION_PROTECTED +}); +``` + + + + +```py filename="DeletionProtectedExample.py" copy +from moose_lib import OlapTable, OlapConfig, LifeCycle, ClickHouseEngines +from pydantic import BaseModel +from datetime import datetime + +class ProductEvent(BaseModel): + id: str + product_id: str + timestamp: datetime + action: str + +product_analytics = OlapTable[ProductEvent]("product_analytics", OlapConfig( + order_by_fields=["timestamp", "product_id"], + engine=ClickHouseEngines.ReplacingMergeTree, + life_cycle=LifeCycle.DELETION_PROTECTED +)) +``` + + + + diff --git a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-externally-managed.mdx b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-externally-managed.mdx new file mode 100644 index 0000000000..881c68c24c --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-externally-managed.mdx @@ -0,0 +1,91 @@ +--- +title: Externally Managed Lifecycle +description: Configure Moose to interact with existing, externally managed database schemas. +order: 23 +category: migrate +--- + +import { Callout, LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Externally Managed Lifecycle + +`LifeCycle.EXTERNALLY_MANAGED` tells Moose to interact with existing resources without managing their schema or lifecycle. In this mode, you are fully responsible for creating and maintaining the database schema outside the context of your code. + +## Use Cases + +- **Existing Infrastructure**: You're connecting to tables managed by another team or process. +- **Integrations**: You're integrating with external systems like PeerDB or CDC tools that manage the schema. +- **Strict Governance**: Your organization requires strict, manual database change management. + +## Syncing Schema + +Because Moose doesn't manage the schema, your code definitions must match the database exactly. Mismatches can cause runtime errors. + +Use `moose db pull` to generate Moose models from your remote database: + +```bash +moose db pull --clickhouse-url +``` + +## Configuration + + + + +```ts title="ExternallyManagedExample.ts" +import { Stream, OlapTable, LifeCycle, Key } from "@514labs/moose-lib"; + +interface ExternalUserData { + userId: Key; + fullName: string; + emailAddress: string; + createdAt: Date; +} + +// Connect to existing database table +const legacyUserTable = new OlapTable("legacy_users", { + lifeCycle: LifeCycle.EXTERNALLY_MANAGED +}); + +// Connect to existing Kafka topic +const legacyStream = new Stream("legacy_user_stream", { + lifeCycle: LifeCycle.EXTERNALLY_MANAGED, + destination: legacyUserTable +}); +``` + + + + +```py filename="ExternallyManagedExample.py" copy +from moose_lib import Stream, OlapTable, OlapConfig, StreamConfig, LifeCycle, Key +from pydantic import BaseModel +from datetime import datetime + +class ExternalUserData(BaseModel): + user_id: Key[str] + full_name: str + email_address: str + created_at: datetime + +# Connect to existing database table +legacy_user_table = OlapTable[ExternalUserData]("legacy_users", OlapConfig( + life_cycle=LifeCycle.EXTERNALLY_MANAGED +)) + +# Connect to existing Kafka topic +legacy_stream = Stream[ExternalUserData]("legacy_user_stream", StreamConfig( + life_cycle=LifeCycle.EXTERNALLY_MANAGED, + destination=legacy_user_table +)) +``` + + + + + +`moose dev` **WILL CREATE** `EXTERNALLY_MANAGED` tables in your local ClickHouse instance to enable development of queries and views against your schema. + +- **Local Updates**: Schema changes in code **WILL** update your local database. +- **No Remote Impact**: These changes are **NEVER** applied to the remote database. + diff --git a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-fully-managed.mdx b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-fully-managed.mdx new file mode 100644 index 0000000000..48114fa432 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-fully-managed.mdx @@ -0,0 +1,70 @@ +--- +title: Fully Managed Lifecycle +description: Default lifecycle mode where Moose automatically manages all schema changes, including destructive ones. +order: 21 +category: migrate +--- + +import { Callout, LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Fully Managed Lifecycle + +`LifeCycle.FULLY_MANAGED` is the default behavior where Moose has complete control over your database resources. When you change your data models, Moose will automatically: + +- Add new columns or tables +- Remove columns or tables that no longer exist in your code +- Modify existing column types and constraints + + +This mode can perform destructive operations. Data may be lost if you remove fields from your data models or if you perform operations that require a destroy and recreate to be effective, like changing the `order_by_fields` (Python) or `orderByFields` (TypeScript) field. + + +## Examples + + + + +```ts filename="FullyManagedExample.ts" copy +import { OlapTable, LifeCycle } from "@514labs/moose-lib"; + +interface UserData { + id: string; + name: string; + email: string; +} + +// Default behavior - fully managed +const userTable = new OlapTable("users"); + +// Explicit fully managed configuration +const explicitTable = new OlapTable("users", { + orderByFields: ["id"], + lifeCycle: LifeCycle.FULLY_MANAGED +}); +``` + + + + +```py filename="FullyManagedExample.py" copy +from moose_lib import OlapTable, OlapConfig, LifeCycle +from pydantic import BaseModel + +class UserData(BaseModel): + id: str + name: str + email: str + +# Default behavior - fully managed +user_table = OlapTable[UserData]("users") + +# Explicit fully managed configuration +explicit_table = OlapTable[UserData]("users", OlapConfig( + order_by_fields=["id"], + life_cycle=LifeCycle.FULLY_MANAGED +)) +``` + + + + diff --git a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle.mdx b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle.mdx index fbc537281a..fd15d50807 100644 --- a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle.mdx +++ b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle.mdx @@ -1,75 +1,37 @@ --- title: Lifecycle Management -description: Control how Moose manages database and streaming resources when your code changes +description: Reference documentation for the LifeCycle enum configuration property on OlapTable and Stream objects. order: 2 category: migrate --- -import { Callout, BulletPointsCard, LanguageTabs, LanguageTabContent } from "@/components/mdx"; +import { BulletPointsCard, LanguageTabs, LanguageTabContent } from "@/components/mdx"; # Lifecycle Management - - - - - -## Overview +The `LifeCycle` enum is a configuration property that controls how Moose Migrate manages individual `OlapTable` and `Stream` resources during schema evolution. Each resource can have its own lifecycle mode, enabling hybrid management models within a single application. -The `LifeCycle` enum controls how Moose manages the lifecycle of database/streaming resources when your code changes. -This feature gives you fine-grained control over whether Moose automatically updates your database schema or -leaves it under external/manual control. +## Lifecycle Modes - +| Mode | Behavior | Default | +| :--- | :--- | :--- | +| `FULLY_MANAGED` | Moose automatically modifies resources to match your code, including destructive operations (drops, deletions). | Yes (for new resources) | +| `DELETION_PROTECTED` | Moose modifies resources to match your code but blocks destructive operations (drops, deletions). | No | +| `EXTERNALLY_MANAGED` | Moose does not modify resources. You are responsible for managing the schema manually. | No | -## LifeCycle Modes +## Configuration Syntax -### `FULLY_MANAGED` (Default) +The `lifeCycle`/`life_cycle` property is set in the configuration object when creating `OlapTable` or `Stream` instances. -This is the default behavior where Moose has complete control over your database resources. When you change your data models, Moose will automatically: - -- Add new columns or tables -- Remove columns or tables that no longer exist in your code -- Modify existing column types and constraints - - -This mode can perform destructive operations. Data may be lost if you remove fields from your data models or if you perform operations that require a destroy and recreate to be effective, like changing the `order_by_fields` (Python) or `orderByFields` (TypeScript) field. - +### OlapTable Configuration -```ts filename="FullyManagedExample.ts" copy -import { OlapTable, LifeCycle } from "@514labs/moose-lib"; - -interface UserData { - id: string; - name: string; - email: string; -} +```ts +import { OlapTable, OlapConfig, LifeCycle } from "@514labs/moose-lib"; -// Default behavior - fully managed -const userTable = new OlapTable("users"); - -// Explicit fully managed configuration -const explicitTable = new OlapTable("users", { - orderByFields: ["id"], +const table = new OlapTable("table_name", { lifeCycle: LifeCycle.FULLY_MANAGED }); ``` @@ -77,21 +39,10 @@ const explicitTable = new OlapTable("users", { -```py filename="FullyManagedExample.py" copy +```py from moose_lib import OlapTable, OlapConfig, LifeCycle -from pydantic import BaseModel - -class UserData(BaseModel): - id: str - name: str - email: str - -# Default behavior - fully managed -user_table = OlapTable[UserData]("users") -# Explicit fully managed configuration -explicit_table = OlapTable[UserData]("users", OlapConfig( - order_by_fields=["id"], +table = OlapTable[DataType]("table_name", OlapConfig( life_cycle=LifeCycle.FULLY_MANAGED )) ``` @@ -99,141 +50,86 @@ explicit_table = OlapTable[UserData]("users", OlapConfig( -### `DELETION_PROTECTED` - -This mode allows Moose to automatically add new database structures but prevents it from removing existing ones. -Perfect for production environments where you want to evolve your schema safely without risking data loss. - -**What Moose will do:** -- Add new columns, tables -- Modify column types (if compatible) -- Update non-destructive configurations - -**What Moose won't do:** -- Drop columns or tables -- Perform destructive schema changes +### Stream Configuration -```ts filename="DeletionProtectedExample.ts" copy -import { IngestPipeline, LifeCycle } from "@514labs/moose-lib"; - -interface ProductEvent { - id: string; - productId: string; - timestamp: Date; - action: string; -} +```ts +import { Stream, StreamConfig, LifeCycle } from "@514labs/moose-lib"; -const productAnalytics = new IngestPipeline("product_analytics", { - table: { - orderByFields: ["timestamp", "productId"], - engine: ClickHouseEngines.ReplacingMergeTree, - }, - stream: { - parallelism: 4, - }, - ingestApi: true, - // automatically applied to the table and stream - lifeCycle: LifeCycle.DELETION_PROTECTED +const stream = new Stream("stream_name", { + destination: table, + lifeCycle: LifeCycle.FULLY_MANAGED }); ``` -```py filename="DeletionProtectedExample.py" copy -from moose_lib import IngestPipeline, IngestPipelineConfig, OlapConfig, StreamConfig, LifeCycle -from pydantic import BaseModel -from datetime import datetime - -class ProductEvent(BaseModel): - id: str - product_id: str - timestamp: datetime - action: str +```py +from moose_lib import Stream, StreamConfig, LifeCycle -product_analytics = IngestPipeline[ProductEvent]("product_analytics", IngestPipelineConfig( - table=OlapConfig( - order_by_fields=["timestamp", "product_id"], - engine=ClickHouseEngines.ReplacingMergeTree, - ), - stream=StreamConfig( - parallelism=4, - ), - ingest_api=True, - # automatically applied to the table and stream - life_cycle=LifeCycle.DELETION_PROTECTED +stream = Stream[DataType]("stream_name", StreamConfig( + destination=table, + life_cycle=LifeCycle.FULLY_MANAGED )) ``` -### `EXTERNALLY_MANAGED` +### IngestPipeline Configuration -This mode tells Moose to completely hands-off your resources. -You become responsible for creating and managing the database schema. This is useful when: - -- You have existing database tables managed by another team -- You're integrating with another system (e.g. PeerDB) -- You have strict database change management processes - - -With externally managed resources, you must ensure your database schema matches your data models exactly, or you may encounter runtime errors. - +For `IngestPipeline`, you can set lifecycle modes independently for the table and stream components. -```ts filename="ExternallyManagedExample.ts" copy -import { Stream, OlapTable, LifeCycle, Key } from "@514labs/moose-lib"; - -interface ExternalUserData { - userId: Key; - fullName: string; - emailAddress: string; - createdAt: Date; -} +```ts +import { IngestPipeline, IngestPipelineConfig, LifeCycle } from "@514labs/moose-lib"; -// Connect to existing database table -const legacyUserTable = new OlapTable("legacy_users", { - lifeCycle: LifeCycle.EXTERNALLY_MANAGED -}); - -// Connect to existing Kafka topic -const legacyStream = new Stream("legacy_user_stream", { - lifeCycle: LifeCycle.EXTERNALLY_MANAGED, - destination: legacyUserTable +const pipeline = new IngestPipeline("pipeline_name", { + table: { + lifeCycle: LifeCycle.DELETION_PROTECTED + }, + stream: { + lifeCycle: LifeCycle.FULLY_MANAGED + } }); ``` -```py filename="ExternallyManagedExample.py" copy -from moose_lib import Stream, OlapTable, OlapConfig, StreamConfig, LifeCycle, Key -from pydantic import BaseModel -from datetime import datetime - -class ExternalUserData(BaseModel): - user_id: Key[str] - full_name: str - email_address: str - created_at: datetime - -# Connect to existing database table -legacy_user_table = OlapTable[ExternalUserData]("legacy_users", OlapConfig( - life_cycle=LifeCycle.EXTERNALLY_MANAGED -)) +```py +from moose_lib import IngestPipeline, IngestPipelineConfig, OlapConfig, StreamConfig, LifeCycle -# Connect to existing Kafka topic -legacy_stream = Stream[ExternalUserData]("legacy_user_stream", StreamConfig( - life_cycle=LifeCycle.EXTERNALLY_MANAGED, - destination=legacy_user_table +pipeline = IngestPipeline[DataType]("pipeline_name", IngestPipelineConfig( + table=OlapConfig( + life_cycle=LifeCycle.DELETION_PROTECTED + ), + stream=StreamConfig( + life_cycle=LifeCycle.FULLY_MANAGED + ) )) ``` + +## Use Cases + +| Scenario | Recommended Mode | Rationale | +| :--- | :--- | :--- | +| Development/iteration | `FULLY_MANAGED` | Allows rapid schema changes including destructive operations. | +| Production tables | `DELETION_PROTECTED` | Prevents accidental data loss while allowing schema evolution. | +| Legacy/shared tables | `EXTERNALLY_MANAGED` | Tables managed by another team or system. | +| CDC-managed streams | `EXTERNALLY_MANAGED` | Topics created by ClickPipes, PeerDB, or other CDC services. | +| Moose-managed streams | `FULLY_MANAGED` | Topics created and managed by Moose. | + +## See Also + +- [Fully Managed](/moosestack/migrate/lifecycle-fully-managed) - Default lifecycle mode +- [Deletion Protected](/moosestack/migrate/lifecycle-deletion-protected) - Safe production mode +- [Externally Managed](/moosestack/migrate/lifecycle-externally-managed) - Read-only mode diff --git a/apps/framework-docs-v2/content/moosestack/migrate/modes.mdx b/apps/framework-docs-v2/content/moosestack/migrate/modes.mdx new file mode 100644 index 0000000000..3a266a5604 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/modes.mdx @@ -0,0 +1,63 @@ +--- +title: Generating Migrations +description: Overview of the two schema evolution modes in Moose used for development and production. +order: 1 +category: migrate +--- + +import { Callout } from "@/components/mdx"; + +# Generating Migrations + +Moose Migrate offers two complementary modes for generating migrations. Each is designed for use in different stages of the application lifecycle, and it's best practice to use both in your workflow: + +- [Auto-Inferred Migrations](#auto-inferred-migrations) - Changes are instantly applied to your database without any manual review. Designed for local development. +- [Planned Migrations](#planned-migrations) - Changes are staged in a reviewable and editable plan file. Designed for production deployments. + +## Overview + +| | Auto-Inferred Migrations | Planned Migrations | +| :--- | :--- | :--- | +| **Primary Use Case** | Local Development | Production Deployment | +| **Trigger** | File Save / Watcher | `moose generate migration` | +| **Artifact** | None (Immediate SQL Execution) | Migration Plan (`plan.yaml`) | +| **Safety** | Low (Optimized for speed) | High (Optimized for safety) | +| **Reviewable** | No | Yes | +| **Drift Detection** | No | Yes | + +## Auto-Inferred Migrations + +Designed for the "inner loop" of development, **Auto-Inferred Migrations** prioritize velocity. As you iterate on your data models in code, Moose automatically applies the necessary changes to your local database in real-time. + +* **Optimized for:** Prototyping, local development, rapid iteration. +* **Key Characteristic:** Invisible, instant schema updates. +* **Risk:** Can be destructive (e.g., renaming a field drops the column). + +[Read the Auto-Inferred Migrations Reference →](/moosestack/migrate/auto-inferred) + +## Planned Migrations + +Designed for the "outer loop" of deployment, **Planned Migrations** prioritize safety. This mode separates the *generation* of changes from their *execution*, creating a static artifact that can be reviewed, tested, and versioned. + +* **Optimized for:** Production deployments, team collaboration, CI/CD. +* **Key Characteristic:** Reviewable, editable migration plans. +* **Risk:** Minimal (requires explicit approval and handles drift detection). + +[Read the Planned Migrations Reference →](/moosestack/migrate/planned-migrations) + +## Use Cases + +| Scenario | Recommended Mode | Rationale | +| :--- | :--- | :--- | +| **Local Prototyping** | `Auto-Inferred` | You want to iterate quickly on your schema without running commands for every change. | +| **Production Deployment** | `Planned` | You need to ensure that schema changes are safe and do not accidentally delete user data. | +| **CI/CD Pipeline** | `Planned` | You need to apply migrations in a deterministic way as part of your deployment process. | +| **Renaming Columns** | `Planned` | You need to explicitly tell the database to rename a column instead of dropping and re-adding it. | +| **Team Review** | `Planned` | You want to review schema changes in a Pull Request before they are applied. | + +## See Also + +- [Auto-Inferred Migrations](/moosestack/migrate/auto-inferred) +- [Planned Migrations](/moosestack/migrate/planned-migrations) +- [Lifecycle Management](/moosestack/migrate/lifecycle) + diff --git a/apps/framework-docs-v2/content/moosestack/migrate/plan-format.mdx b/apps/framework-docs-v2/content/moosestack/migrate/plan-format.mdx new file mode 100644 index 0000000000..a031e69ea3 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/plan-format.mdx @@ -0,0 +1,240 @@ +--- +title: Migration Plan Format +description: Detailed specification of the migration plan.yaml structure, operations, and types. +order: 5 +category: migrate +--- + +import { Callout } from "@/components/mdx"; + +# Migration Plan Format + +The `migrations/plan.yaml` file is the declarative source of truth for pending database changes in Moose. It defines the exact sequence of operations Moose will execute to transition your production database to the desired state. + +This reference documents the file structure, supported operations, and data type definitions found in generated plans. + +## Plan Structure + +A migration plan is a YAML document with the following root properties: + +| Field | Type | Description | +| :--- | :--- | :--- | +| `created_at` | String (ISO 8601) | Timestamp when the plan was generated. Used for versioning and audit trails. | +| `operations` | Array | Ordered list of migration operations to execute. | + +```yaml title="migrations/plan.yaml" copy=false +created_at: 2025-01-14T10:30:00Z +operations: + - DropTableColumn: ... + - AddTableColumn: ... +``` + +## Operation Reference + +Operations are the atomic units of change in a migration plan. Moose executes them sequentially. + +### Table Operations + +#### `CreateTable` + +Creates a new ClickHouse table. + +```yaml title="migrations/plan.yaml" copy=false +- CreateTable: + table: + name: + columns: [] + order_by: [] + engine: +``` + +| Field | Description | +| :--- | :--- | +| `name` | Name of the table to create. | +| `columns` | List of column definitions. | +| `order_by` | Array of column names used for the sorting key. | +| `engine` | ClickHouse table engine (e.g., `MergeTree`, `ReplacingMergeTree`). | + +#### `DropTable` + +Permanently removes a table and all its data. + +```yaml title="migrations/plan.yaml" copy=false +- DropTable: + table: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Name of the table to drop. | + +### Column Operations + +#### `AddTableColumn` + +Adds a new column to an existing table. + +```yaml title="migrations/plan.yaml" copy=false +- AddTableColumn: + table: + column: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Target table name. | +| `column` | Full definition of the new column. | + +#### `DropTableColumn` + +Removes a column from a table. **Destructive operation.** + +```yaml title="migrations/plan.yaml" copy=false +- DropTableColumn: + table: + column_name: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Target table name. | +| `column_name` | Name of the column to remove. | +#### `RenameTableColumn` + +Renames a column while preserving its data. + +```yaml title="migrations/plan.yaml" copy=false +- RenameTableColumn: + table: + before_column_name: + after_column_name: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Target table name. | +| `before_column_name` | Current name of the column. | +| `after_column_name` | New name for the column. | + +#### `ModifyTableColumn` + +Changes a column's data type or properties. + +```yaml title="migrations/plan.yaml" copy=false +- ModifyTableColumn: + table: + before_column: + after_column: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Target table name. | +| `before_column` | Snapshot of the column state before modification. | +| `after_column` | Desired state of the column. | + +### SQL Operations + +#### `RawSql` + +Executes arbitrary SQL statements. Used for custom migrations, backfills, or unsupported operations. + +```yaml title="migrations/plan.yaml" copy=false +- RawSql: + sql: [] + description: +``` + +| Field | Description | +| :--- | :--- | +| `sql` | List of SQL statements to execute in order. | +| `description` | Human-readable explanation of the operation's purpose. | + +## Column Definition + +Every column in a `CreateTable`, `AddTableColumn`, or `ModifyTableColumn` operation follows this structure: + +```yaml title="migrations/plan.yaml" copy=false +name: +data_type: +required: +unique: +primary_key: +default: | null +comment: | null +annotations: [[, ]] +``` + +| Property | Type | Description | +| :--- | :--- | :--- | +| `name` | String | Column identifier. | +| `data_type` | Type | ClickHouse data type (see below). | +| `required` | Boolean | If `true`, the column is `NOT NULL`. | +| `unique` | Boolean | (Metadata) Whether the field is marked unique in the model. | +| `primary_key` | Boolean | Whether the field is part of the primary key. | +| `default` | String | Default value expression (e.g., `'active'`, `0`, `now()`). | + +## Data Types + +Moose maps data model types to ClickHouse types in the plan. + +### Scalar Types + +| Type | YAML Representation | +| :--- | :--- | +| **String** | `String` | +| **Boolean** | `Boolean` | +| **Integer** | `Int8`, `Int16`, `Int32`, `Int64`, `UInt8`... | +| **Float** | `Float32`, `Float64` | +| **Date** | `Date`, `Date32`, `DateTime` | +| **UUID** | `UUID` | + +### Complex Types + +#### Nullable +Wraps another type to allow `NULL` values. +```yaml title="migrations/plan.yaml" copy=false +data_type: + Nullable: + nullable: String +``` + +#### Arrays +List of values of a single type. +```yaml title="migrations/plan.yaml" copy=false +data_type: + Array: + elementType: String + elementNullable: false +``` + +#### Enums +Fixed set of string or integer values. +```yaml title="migrations/plan.yaml" copy=false +data_type: + Enum: + name: "Status" + values: + - name: "ACTIVE" + value: { String: "active" } + - name: "INACTIVE" + value: { String: "inactive" } +``` + +#### Nested (Structs) +Hierarchical data structures. +```yaml title="migrations/plan.yaml" copy=false +data_type: + Nested: + name: "Address" + columns: [] + jwt: false +``` + +#### LowCardinality +Storage optimization for columns with few unique values. +```yaml title="migrations/plan.yaml" copy=false +data_type: + LowCardinality: + nullable: String +``` diff --git a/apps/framework-docs-v2/content/moosestack/migrate/planned-migrations.mdx b/apps/framework-docs-v2/content/moosestack/migrate/planned-migrations.mdx new file mode 100644 index 0000000000..fc2e2adfd3 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/planned-migrations.mdx @@ -0,0 +1,122 @@ +--- +title: Planned Migrations +description: Reference documentation for the Planned Migrations system in Moose. +order: 6 +category: olap +--- + +import { Callout } from "@/components/mdx"; + +# Planned Migrations + +**Planned migrations** are the production-grade schema evolution mechanism in Moose. Unlike auto-inferred migrations, this system separates the *generation* of schema changes from their *execution*, introducing a reviewable artifact (the plan) into your deployment lifecycle. + +## Command + +Generate a migration plan by comparing your local code against a production environment: + +```bash +# For Server Runtime (connect to Moose Admin API) +moose generate migration --url --token --save + +# For Serverless (connect to ClickHouse directly) +moose generate migration --clickhouse-url --save +``` + +**Key Benefits:** +* **Deterministic:** The plan is a static file (`plan.yaml`) that won't change at runtime. +* **Drift Detection:** Snapshots (`remote_state.json`) ensure the DB hasn't changed since the plan was created. +* **Reviewable:** You can audit every operation (e.g., `DropColumn`, `AddTable`) before it runs, and you can edit the plan to override Moose's assumptions. +* **Versioned:** Commit plans to Git to create a permanent audit trail. + +## Workflow + +The lifecycle consists of four distinct stages: + +1. **Code Change** — Modify your data models (tables, views) in your Moose project. +2. **Generation** — Run the CLI to compare your code against production. + ```bash + moose generate migration --save ... + ``` +3. **Review** — Inspect the generated `migrations/plan.yaml` file and commit it to Git. +4. **Application** — Execute the plan during deployment (if using Moose Runtime), manually via the CLI or your own CI/CD pipeline (if using Serverless). + ```bash + moose migrate ... + + ## or if using the Moose Runtime + moose prod + ``` + +## Generated Artifacts + +Running the generation command produces three files in the `migrations/` directory. + +| File | Purpose | +| :--- | :--- | +| `plan.yaml` | The imperative list of operations (e.g., `AddTableColumn`) to execute. See [Plan Reference](/moosestack/migrate/reference). | +| `remote_state.json` | A snapshot of the production database schema at the time of generation. Used to detect drift. | +| `local_infra_map.json` | A snapshot of your local code's schema definitions. Used to validate the plan against the code. | + +## Configuration + +Planned migrations are enabled via the `ddl_plan` feature flag in your project configuration. + +```toml filename="moose.config.toml" +[features] +olap = true +ddl_plan = true +``` + +## Command Options + +The `moose generate migration` command accepts different arguments depending on your [deployment model for applying changes](/moosestack/migrate#applying-changes). + +### via Moose Runtime (`moose prod`) + +Connect to the **Admin API** of the running service. + +```bash +moose generate migration --url --token --save +``` + +| Option | Description | +| :--- | :--- | +| `--url` | The endpoint of your production Moose Admin API. | +| `--token` | The authentication token for the Admin API. | +| `--save` | Writes the generated plan to the `migrations/` directory. Without this, it performs a dry run. | + +### via Serverless (`moose migrate`) + +Connect directly to the **ClickHouse database**. + +```bash +moose generate migration --clickhouse-url --save +``` + +| Option | Description | +| :--- | :--- | +| `--clickhouse-url` | Direct connection string (e.g., `clickhouse://user:pass@host:9440/db`). | +| `--save` | Writes the generated plan to the `migrations/` directory. Without this, it performs a dry run. | + +## Drift Detection + +Drift occurs when the target database's schema changes between the time you generate a plan and the time you apply it. + +**How it works:** +1. `moose generate` writes the current DB state to `remote_state.json`. +2. `moose migrate` (serverless) or `moose prod` (server runtime) compares `remote_state.json` with the *current* DB state. +3. If they differ (hash mismatch), the migration **aborts**. + +**Resolution:** +To fix drift, you must regenerate the plan against the new production state. + +```bash +# Regenerate to accept the new state +moose generate migration ... --save +``` + +## See Also + +- [Migration Plan Reference](/moosestack/migrate/reference) - Detailed syntax of `plan.yaml`. +- [Serverless (moose migrate)](/moosestack/migrate/apply-planned-migrations-cli) - Execution guide. +- [Server Runtime](/moosestack/migrate/apply-planned-migrations-service) - Execution guide. diff --git a/apps/framework-docs-v2/content/moosestack/moose-cli.mdx b/apps/framework-docs-v2/content/moosestack/moose-cli.mdx index 6f03230427..908ee0f3ea 100644 --- a/apps/framework-docs-v2/content/moosestack/moose-cli.mdx +++ b/apps/framework-docs-v2/content/moosestack/moose-cli.mdx @@ -163,6 +163,90 @@ moose peek [--limit ] [--file ] [-t|--table] [-s|--stream] - `-t, --table`: View data from a table (default if neither flag specified) - `-s, --stream`: View data from a stream/topic +### Query +Execute arbitrary SQL queries against your ClickHouse database during development. +```bash +# Direct query +moose query "SELECT count(*) FROM users" + +# From file +moose query -f queries/analysis.sql + +# From stdin +cat query.sql | moose query + +# With limit +moose query "SELECT * FROM events" --limit 100 +``` +- ``: SQL query string to execute (optional if using --file or stdin) +- `-f, --file `: Read query from file +- `-l, --limit `: Maximum rows to return (default: 10000) + +**Requirements:** +- Requires `moose dev` to be running +- Executes queries against your development ClickHouse instance + +**Output:** +- Returns results as newline-delimited JSON +- One JSON object per row +- Row count summary at end + +#### Formatting Queries for Code + +Use the `-c/--format-query` flag to format SQL queries as code literals instead of executing them: + +```bash +# Format as Python (raw string) +moose query -c python "SELECT * FROM users WHERE email REGEXP '[a-z]+'" +# Output: +# r""" +# SELECT * FROM users WHERE email REGEXP '[a-z]+' +# """ + +# Format as TypeScript (template literal) +moose query -c typescript "SELECT * FROM events" +# Output: +# ` +# SELECT * FROM events +# ` + +# Works with file input +moose query -c python -f my_query.sql + +# Prettify SQL before formatting (adds line breaks and indentation) +moose query -c python -p "SELECT id, name FROM users WHERE active = 1 ORDER BY name" +# Output: +# r""" +# SELECT id, name +# FROM users +# WHERE active = 1 +# ORDER BY name +# """ + +# Use heredoc for multi-line SQL queries (no need to escape quotes) +moose query -c python -p < '2024-01-01' +GROUP BY b.id, b.name, b.email +HAVING COUNT(o.id) > 5 +ORDER BY total_spent DESC +LIMIT 50 +EOF + +# Supported languages: python (py), typescript (ts) +# Prettify flag: -p, --prettify (only works with --format-query) +``` + +**Use case:** Iterate on SQL queries in the CLI, then format and paste into your application code without manual escaping. Use `--prettify` to clean up messy one-line queries. + ## Generation Commands ### Generate Hash Token diff --git a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx index 865fcdc908..584a71f444 100644 --- a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx +++ b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx @@ -514,6 +514,131 @@ unsorted = OlapTable[Events]("events_unsorted", OlapConfig( +### Primary Key Expression + +Use a ClickHouse SQL expression to define the primary key explicitly. This is useful when: +- You need functions in the primary key (e.g., `cityHash64(id)`) +- The primary key column ordering should differ from the schema definition +- The primary key should differ from the ORDER BY + +**Important:** When `primaryKeyExpression` is specified, any `Key` annotations on columns are ignored for PRIMARY KEY generation. + + + +```ts filename="PrimaryKeyExpression.ts" copy +import { OlapTable, Key } from "@514labs/moose-lib"; + +// Example 1: Primary key with function +interface UserEvents { + userId: string; + eventId: string; + timestamp: Date; +} + +const eventsTable = new OlapTable("user_events", { + // Use hash function in primary key for better distribution + primaryKeyExpression: "(userId, cityHash64(eventId))", + orderByExpression: "(userId, timestamp)", +}); + +// Example 2: Different ordering in primary key vs columns +interface Product { + category: string; + brand: string; + productId: string; + name: string; +} + +const productsTable = new OlapTable("products", { + // Primary key order optimized for uniqueness + primaryKeyExpression: "productId", + // Order by optimized for common queries + orderByFields: ["category", "brand", "productId"], +}); + +// Example 3: Override Key annotation +interface Record { + id: Key; // This Key annotation will be IGNORED + otherId: string; +} + +const recordTable = new OlapTable("records", { + // This expression overrides the Key annotation + primaryKeyExpression: "(otherId, id)", + orderByExpression: "(otherId, id)", +}); +``` + + +```py filename="PrimaryKeyExpression.py" copy +from moose_lib import OlapTable, OlapConfig, Key +from pydantic import BaseModel +from datetime import datetime + +# Example 1: Primary key with function +class UserEvents(BaseModel): + user_id: str + event_id: str + timestamp: datetime + +events_table = OlapTable[UserEvents]("user_events", OlapConfig( + # Use hash function in primary key for better distribution + primary_key_expression="(user_id, cityHash64(event_id))", + order_by_expression="(user_id, timestamp)", +)) + +# Example 2: Different ordering in primary key vs columns +class Product(BaseModel): + category: str + brand: str + product_id: str + name: str + +products_table = OlapTable[Product]("products", OlapConfig( + # Primary key order optimized for uniqueness + primary_key_expression="(product_id)", + # Order by optimized for common queries + order_by_fields=["category", "brand", "product_id"], +)) + +# Example 3: Override Key[T] annotation +class Record(BaseModel): + id: Key[str] # This Key[T] annotation will be IGNORED + other_id: str + +record_table = OlapTable[Record]("records", OlapConfig( + # This expression overrides the Key[T] annotation + primary_key_expression="(other_id, id)", + order_by_expression="(other_id, id)", +)) +``` + + + +**Rationale for Primary Key Expression:** + +1. **Function Support**: Primary keys can use ClickHouse functions like `cityHash64()` for better data distribution, which cannot be expressed through column-level annotations. + +2. **Flexible Ordering**: The ordering of columns in the primary key can be different from the ordering in the schema definition, allowing optimization for both data uniqueness and query patterns. + +3. **Separation of Concerns**: PRIMARY KEY and ORDER BY serve different purposes in ClickHouse: + - PRIMARY KEY defines uniqueness and deduplication + - ORDER BY defines physical data layout and query optimization + + Sometimes these need different column orderings for optimal performance. + +**Important Constraint:** + +⚠️ **PRIMARY KEY must be a prefix of ORDER BY in ClickHouse.** This means ORDER BY must start with all PRIMARY KEY columns in the same order. + +Valid: +- PRIMARY KEY `(userId)` with ORDER BY `(userId, timestamp)` ✅ +- PRIMARY KEY `(userId, cityHash64(eventId))` with ORDER BY `(userId, cityHash64(eventId), timestamp)` ✅ + +Invalid: +- PRIMARY KEY `(userId, eventId)` with ORDER BY `(userId, timestamp)` ❌ (missing eventId) +- PRIMARY KEY `(userId, eventId)` with ORDER BY `(eventId, userId)` ❌ (wrong order) + ### Using Both Primary Key and Order By Fields @@ -874,6 +999,49 @@ public_s3 = OlapTable[DataRecord]("public_s3", OlapConfig( Both engines support the same credential management and format options. +#### IcebergS3 +The `IcebergS3` engine provides read-only access to Iceberg tables stored in S3: + + + +```ts filename="IcebergTable.ts" copy +import { OlapTable, ClickHouseEngines, mooseRuntimeEnv } from '@514labs/moose-lib'; + +// Iceberg table with AWS credentials (recommended with mooseRuntimeEnv) +export const icebergEvents = new OlapTable("iceberg_events", { + engine: ClickHouseEngines.IcebergS3, + path: "s3://my-bucket/warehouse/db/table/", + format: "Parquet", // or "ORC" + awsAccessKeyId: mooseRuntimeEnv.get("AWS_ACCESS_KEY_ID"), + awsSecretAccessKey: mooseRuntimeEnv.get("AWS_SECRET_ACCESS_KEY"), +}); +``` + + +```py filename="IcebergTable.py" copy +from moose_lib import OlapTable, OlapConfig, moose_runtime_env +from moose_lib.blocks import IcebergS3Engine + +# Iceberg table with AWS credentials (recommended with moose_runtime_env) +iceberg_events = OlapTable[Event]("iceberg_events", OlapConfig( + engine=IcebergS3Engine( + path="s3://my-bucket/warehouse/db/table/", + format="Parquet", # or "ORC" + aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"), + ) +)) +``` + + + + +- IcebergS3 tables are **read-only** and provide access to the latest state of your Iceberg table +- `orderByFields`, `orderByExpression`, `partitionBy`, and `sampleByExpression` are not supported +- The table automatically reflects the current state of the Iceberg table in S3 +- Supported formats: **Parquet** and **ORC** only + + #### In-Memory Buffer (`Buffer`) The `Buffer` engine provides an in-memory buffer that flushes data to a destination table based on time, row count, or size thresholds: @@ -1070,18 +1238,111 @@ cloud_replicated = OlapTable[Record]("cloud_records", OlapConfig( - -The `keeper_path` and `replica_name` parameters are **optional** for replicated engines: +##### Configuring Replication -- **Omit both parameters** (recommended): Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments. The default path pattern `/clickhouse/tables/{uuid}/{shard}` with replica `{replica}` works automatically with Atomic databases (default in modern ClickHouse). - -- **Provide custom paths**: You can still specify both parameters explicitly if you need custom replication paths for your self-managed cluster. +Replicated engines support three configuration approaches. Choose the one that fits your deployment: -**Note**: Both parameters must be provided together, or both omitted. The `{uuid}`, `{shard}`, and `{replica}` macros are automatically substituted by ClickHouse at runtime. +###### Default -For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). +Omit all replication parameters. Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments: + + + +```ts filename="DefaultReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"] + // No keeper_path, replica_name, or cluster needed +}); +``` + + +```py filename="DefaultReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), # No parameters + order_by_fields=["id"] +)) +``` + + + +Moose auto-injects: `/clickhouse/tables/{database}/{shard}/{table_name}` and `{replica}` in local development. ClickHouse Cloud uses its own patterns automatically. + +###### Cluster + +For multi-node deployments, specify a cluster name to use `ON CLUSTER` DDL operations: + + + +```ts filename="ClusterReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "default" // References cluster from moose.config.toml +}); +``` + + +```py filename="ClusterReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), + order_by_fields=["id"], + cluster="default" # References cluster from moose.config.toml +)) +``` + + + +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**Use when:** +- Running multi-node self-managed ClickHouse with cluster configuration +- Need `ON CLUSTER` DDL for distributed operations + +###### Replication Paths + +For custom replication topology, specify both `keeper_path` and `replica_name`: + + + +```ts filename="ExplicitReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + keeperPath: "/clickhouse/tables/{database}/{shard}/my_table", + replicaName: "{replica}", + orderByFields: ["id"] +}); +``` + + +```py filename="ExplicitReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{shard}/my_table", + replica_name="{replica}" + ), + order_by_fields=["id"] +)) +``` + + + +**Use when:** +- Need custom replication paths for advanced configurations +- Both parameters must be provided together + + +**Cannot mix approaches:** Specifying both `cluster` and explicit `keeper_path`/`replica_name` will cause an error. Choose one approach. + +**Cluster is a deployment directive:** Changing `cluster` won't recreate your table—it only affects future DDL operations. +For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). + ### Irregular column names and Python Aliases diff --git a/apps/framework-docs-v2/content/templates/index.mdx b/apps/framework-docs-v2/content/templates/index.mdx index 29cdba52cf..149114ccc2 100644 --- a/apps/framework-docs-v2/content/templates/index.mdx +++ b/apps/framework-docs-v2/content/templates/index.mdx @@ -5,69 +5,13 @@ order: 2 category: getting-started --- -import { CTACards, CTACard } from "@/components/mdx"; -import { Badge } from "@/components/ui/badge"; -import Link from "next/link"; -import { TemplatesGridServer } from "@/components/mdx"; +import { TemplatesGridServer, CommandSnippet } from "@/components/mdx"; # Templates & Apps Moose provides two ways to get started: **templates** and **demo apps**. Templates are simple skeleton applications that you can initialize with `moose init`, while demo apps are more advanced examples available on GitHub that showcase real-world use cases and integrations. -**Initialize a template:** -```bash filename="Terminal" copy -moose init PROJECT_NAME TEMPLATE_NAME -``` - -**List available templates:** -```bash filename="Terminal" copy -moose template list -``` - -## Popular Apps - - - - - - - - - - ---- + ## Browse Apps and Templates diff --git a/apps/framework-docs-v2/next.config.js b/apps/framework-docs-v2/next.config.js index 0214205314..ddd0c45aa7 100644 --- a/apps/framework-docs-v2/next.config.js +++ b/apps/framework-docs-v2/next.config.js @@ -10,6 +10,13 @@ const createWithVercelToolbar = require("@vercel/toolbar/plugins/next"); /** @type {import('next').NextConfig} */ const nextConfig = { + // Based on the provided documentation, cacheComponents is a root-level option + cacheComponents: true, + + experimental: { + // Removing dynamicIO as it caused an error and might be implied or renamed + }, + reactStrictMode: true, pageExtensions: ["js", "jsx", "ts", "tsx", "md", "mdx"], images: { diff --git a/apps/framework-docs-v2/package.json b/apps/framework-docs-v2/package.json index ebb96fdf62..1ec487751a 100644 --- a/apps/framework-docs-v2/package.json +++ b/apps/framework-docs-v2/package.json @@ -21,13 +21,15 @@ "@next/mdx": "^16.0.1", "@radix-ui/react-accordion": "^1.2.11", "@radix-ui/react-avatar": "^1.0.4", + "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-collapsible": "^1.1.11", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.15", "@radix-ui/react-label": "^2.1.7", "@radix-ui/react-navigation-menu": "^1.2.13", + "@radix-ui/react-popover": "^1.1.15", "@radix-ui/react-scroll-area": "^1.2.2", - "@radix-ui/react-select": "^2.0.0", + "@radix-ui/react-select": "^2.2.6", "@radix-ui/react-separator": "^1.1.7", "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-tabs": "^1.1.12", @@ -37,6 +39,8 @@ "@radix-ui/react-use-controllable-state": "^1.2.2", "@shikijs/transformers": "^3.14.0", "@tabler/icons-react": "^3.35.0", + "@tanstack/react-form": "^1.25.0", + "@tanstack/zod-adapter": "^1.136.18", "@types/mdx": "^2.0.13", "@vercel/toolbar": "^0.1.41", "class-variance-authority": "^0.7.1", @@ -62,7 +66,9 @@ "shiki": "^3.14.0", "sonner": "^2.0.7", "tailwind-merge": "^2.6.0", - "tailwindcss-animate": "^1.0.7" + "tailwindcss-animate": "^1.0.7", + "unist-util-visit": "^5.0.0", + "zod": "^3.25.76" }, "devDependencies": { "@repo/eslint-config-custom": "workspace:*", diff --git a/apps/framework-docs-v2/public/robots.txt b/apps/framework-docs-v2/public/robots.txt index 2fe0e16a59..a3a8c086e2 100644 --- a/apps/framework-docs-v2/public/robots.txt +++ b/apps/framework-docs-v2/public/robots.txt @@ -3,7 +3,7 @@ User-agent: * Allow: / # Host -Host: https://docs.moosestack.com +Host: https://docs.fiveonefour.com # Sitemaps -Sitemap: https://docs.moosestack.com/sitemap.xml +Sitemap: https://docs.fiveonefour.com/sitemap.xml diff --git a/apps/framework-docs-v2/public/sitemap-0.xml b/apps/framework-docs-v2/public/sitemap-0.xml index 102b6d1480..a8d33c86f3 100644 --- a/apps/framework-docs-v2/public/sitemap-0.xml +++ b/apps/framework-docs-v2/public/sitemap-0.xml @@ -1,114 +1,163 @@ -https://docs.moosestack.com/ai2025-11-10T02:40:55.309Zdaily0.7 -https://docs.moosestack.com/ai/data-collection-policy2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/demos/context2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/demos/dlqs2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/demos/egress2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/demos/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/demos/ingest2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/demos/model-data2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/demos/mvs2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/getting-started/claude2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/getting-started/cursor2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/getting-started/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/getting-started/other-clients2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/getting-started/vs-code2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/getting-started/windsurf2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/guides/clickhouse-chat2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/guides/clickhouse-proj2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/guides/from-template2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/guides/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/overview2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/reference/cli-reference2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/reference/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/reference/mcp-json-reference2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/ai/reference/tool-reference2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/hosting2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/hosting/deployment2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/hosting/getting-started2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/hosting/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/hosting/overview2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/apis/admin-api2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/apis/analytics-api2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/apis/auth2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/apis/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/apis/ingest-api2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/apis/openapi-sdk2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/apis/trigger-api2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/app-api-frameworks/express2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/app-api-frameworks/fastapi2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/app-api-frameworks/fastify2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/app-api-frameworks/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/app-api-frameworks/koa2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/app-api-frameworks/nextjs2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/app-api-frameworks/raw-nodejs2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/changelog2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/configuration2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/contribution/documentation2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/contribution/framework2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/data-modeling2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/data-sources2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/configuring-moose-for-cloud2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/deploying-on-an-offline-server2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/deploying-on-ecs2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/deploying-on-kubernetes2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/deploying-with-docker-compose2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/monitoring2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/packaging-moose-for-deployment2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/deploying/preparing-clickhouse-redpanda2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/getting-started/from-clickhouse2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/getting-started/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/getting-started/quickstart2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/help/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/help/minimum-requirements2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/help/troubleshooting2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/in-your-stack2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/local-dev-environment2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/metrics2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/migrate/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/migrate/lifecycle2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/migrate/migration-types2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/moose-cli2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/moosedev-mcp2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/apply-migrations2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/db-pull2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/external-tables2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/indexes2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/insert-data2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/model-materialized-view2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/model-table2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/model-view2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/planned-migrations2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/read-data2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/schema-change2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/schema-optimization2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/schema-versioning2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/supported-types2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/olap/ttl2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/overview2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/quickstart2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/reference/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/connect-cdc2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/consumer-functions2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/create-stream2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/dead-letter-queues2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/from-your-code2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/schema-registry2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/sync-to-table2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/streaming/transform-functions2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/templates-examples2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/workflows/cancel-workflow2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/workflows/define-workflow2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/workflows/index2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/workflows/retries-and-timeouts2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/workflows/schedule-workflow2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com/moosestack/workflows/trigger-workflow2025-11-10T02:40:55.310Zdaily0.7 -https://docs.moosestack.com2025-11-10T02:40:55.310Zdaily0.7 +https://docs.fiveonefour.com/ai2025-12-01T18:10:45.728Zdaily0.7 +https://docs.fiveonefour.com/ai/data-collection-policy2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/context2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/dlqs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/egress2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/ingest2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/model-data2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/mvs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/claude2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/cursor2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/other-clients2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/vs-code2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/windsurf2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/clickhouse-chat2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/clickhouse-proj2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/from-template2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/cli-reference2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/mcp-json-reference2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/tool-reference2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting/deployment2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting/getting-started2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting/overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/admin-api2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/analytics-api2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/auth2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/ingest-api2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/openapi-sdk2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/trigger-api2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/express2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastapi2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastify2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/koa2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/nextjs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/raw-nodejs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/changelog2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/configuration2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/contribution/documentation2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/contribution/framework2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/data-modeling2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/data-sources2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/configuring-moose-for-cloud2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-an-offline-server2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-ecs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-kubernetes2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-with-docker-compose2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/monitoring2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/packaging-moose-for-deployment2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/preparing-clickhouse-redpanda2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/from-clickhouse2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/quickstart2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/minimum-requirements2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/troubleshooting2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/in-your-stack2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/local-dev-environment2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/metrics2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/apply-planned-migrations-cli2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/apply-planned-migrations-service2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/auto-inferred2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/failed-migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle-deletion-protected2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle-externally-managed2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle-fully-managed2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/migration-types2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/modes2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/plan-format2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/planned-migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/moose-cli2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/moosedev-mcp2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/apply-migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/db-pull2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/external-tables2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/indexes2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/insert-data2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-materialized-view2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-table2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-view2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/planned-migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/read-data2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/schema-optimization2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/schema-versioning2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/supported-types2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/ttl2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/quickstart2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/reference/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/connect-cdc2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/consumer-functions2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/create-stream2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/dead-letter-queues2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/from-your-code2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/schema-registry2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/sync-to-table2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/transform-functions2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/cancel-workflow2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/define-workflow2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/retries-and-timeouts2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/schedule-workflow2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/trigger-workflow2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/automated-reports2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/automated-reports/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/going-to-production2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/going-to-production/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/change-data-capture2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/change-data-capture/debezium-dev-to-prod-outline2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/change-data-capture/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/impact-analysis2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/impact-analysis/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/migrations/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/connectors2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/connectors/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/pipelines2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/pipelines/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/data-as-code2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/data-as-code/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/dora-for-data2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/dora-for-data/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/ai-enablement2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/ai-enablement/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/data-foundation2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/data-foundation/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/olap-evaluation2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/olap-evaluation/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/platform-engineering2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/platform-engineering/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/templates2025-12-01T18:10:45.729Zdaily0.7 \ No newline at end of file diff --git a/apps/framework-docs-v2/public/sitemap.xml b/apps/framework-docs-v2/public/sitemap.xml index b7449c4902..c30ae08b51 100644 --- a/apps/framework-docs-v2/public/sitemap.xml +++ b/apps/framework-docs-v2/public/sitemap.xml @@ -1,4 +1,4 @@ -https://docs.moosestack.com/sitemap-0.xml +https://docs.fiveonefour.com/sitemap-0.xml \ No newline at end of file diff --git a/apps/framework-docs-v2/src/app/[...slug]/page.tsx b/apps/framework-docs-v2/src/app/(docs)/[...slug]/page.tsx similarity index 81% rename from apps/framework-docs-v2/src/app/[...slug]/page.tsx rename to apps/framework-docs-v2/src/app/(docs)/[...slug]/page.tsx index 35a58dc1d6..1edb56e23b 100644 --- a/apps/framework-docs-v2/src/app/[...slug]/page.tsx +++ b/apps/framework-docs-v2/src/app/(docs)/[...slug]/page.tsx @@ -6,7 +6,7 @@ import { MDXRenderer } from "@/components/mdx-renderer"; import { DocBreadcrumbs } from "@/components/navigation/doc-breadcrumbs"; import { buildDocBreadcrumbs } from "@/lib/breadcrumbs"; -export const dynamic = "force-dynamic"; +// export const dynamic = "force-dynamic"; interface PageProps { params: Promise<{ @@ -18,18 +18,22 @@ interface PageProps { export async function generateStaticParams() { const slugs = getAllSlugs(); + // Filter out templates and guides slugs (they have their own explicit pages) + const filteredSlugs = slugs.filter( + (slug) => !slug.startsWith("templates/") && !slug.startsWith("guides/"), + ); + // Generate params for each slug - const allParams: { slug: string[] }[] = slugs.map((slug) => ({ + const allParams: { slug: string[] }[] = filteredSlugs.map((slug) => ({ slug: slug.split("/"), })); - // Also add section index routes (moosestack, ai, hosting, templates) - // These map to section/index.mdx files + // Also add section index routes (moosestack, ai, hosting) + // Note: templates and guides are now explicit pages, so they're excluded here allParams.push( { slug: ["moosestack"] }, { slug: ["ai"] }, { slug: ["hosting"] }, - { slug: ["templates"] }, ); return allParams; @@ -81,6 +85,11 @@ export default async function DocPage({ params }: PageProps) { const slug = slugArray.join("/"); + // Templates and guides are now explicit pages, so they should not be handled by this catch-all route + if (slug.startsWith("templates/") || slug.startsWith("guides/")) { + notFound(); + } + let content; try { content = await parseMarkdownContent(slug); diff --git a/apps/framework-docs-v2/src/app/(docs)/guides/[...slug]/page.tsx b/apps/framework-docs-v2/src/app/(docs)/guides/[...slug]/page.tsx new file mode 100644 index 0000000000..3e112e50cc --- /dev/null +++ b/apps/framework-docs-v2/src/app/(docs)/guides/[...slug]/page.tsx @@ -0,0 +1,246 @@ +import { notFound } from "next/navigation"; +import type { Metadata } from "next"; +import { + getAllSlugs, + parseMarkdownContent, + discoverStepFiles, +} from "@/lib/content"; +import { TOCNav } from "@/components/navigation/toc-nav"; +import { MDXRenderer } from "@/components/mdx-renderer"; +import { DocBreadcrumbs } from "@/components/navigation/doc-breadcrumbs"; +import { buildDocBreadcrumbs } from "@/lib/breadcrumbs"; +import { GuideStepsWrapper } from "@/components/guides/guide-steps-wrapper"; +import { DynamicGuideBuilder } from "@/components/guides/dynamic-guide-builder"; +import { parseGuideManifest, getCachedGuideSteps } from "@/lib/guide-content"; + +// export const dynamic = "force-dynamic"; + +interface PageProps { + params: Promise<{ + slug: string[]; + }>; + searchParams: Promise<{ [key: string]: string | string[] | undefined }>; +} + +export async function generateStaticParams() { + // Get all slugs and filter for guides + const slugs = getAllSlugs(); + + // Filter for guides slugs and generate params + const guideSlugs = slugs.filter((slug) => slug.startsWith("guides/")); + + // Remove the "guides/" prefix and split into array + const allParams: { slug: string[] }[] = guideSlugs + .map((slug) => slug.replace(/^guides\//, "")) + .filter((slug) => slug !== "index") // Exclude the index page + .map((slug) => ({ + slug: slug.split("/"), + })); + + return allParams; +} + +export async function generateMetadata({ + params, +}: PageProps): Promise { + const resolvedParams = await params; + const slugArray = resolvedParams.slug; + + // Handle empty slug array (shouldn't happen with [...slug] but be safe) + if (!slugArray || slugArray.length === 0) { + return { + title: "Guides | MooseStack Documentation", + description: + "Comprehensive guides for building applications, managing data, and implementing data warehousing strategies", + }; + } + + const slug = `guides/${slugArray.join("/")}`; + + try { + const content = await parseMarkdownContent(slug); + return { + title: + content.frontMatter.title ? + `${content.frontMatter.title} | MooseStack Documentation` + : "Guides | MooseStack Documentation", + description: + content.frontMatter.description || + "Comprehensive guides for building applications, managing data, and implementing data warehousing strategies", + }; + } catch (error) { + return { + title: "Guides | MooseStack Documentation", + description: + "Comprehensive guides for building applications, managing data, and implementing data warehousing strategies", + }; + } +} + +export default async function GuidePage({ params, searchParams }: PageProps) { + const resolvedParams = await params; + const resolvedSearchParams = await searchParams; + const slugArray = resolvedParams.slug; + + // Handle empty slug array (shouldn't happen with [...slug] but be safe) + if (!slugArray || slugArray.length === 0) { + notFound(); + } + + const slug = `guides/${slugArray.join("/")}`; + + let content; + try { + content = await parseMarkdownContent(slug); + } catch (error) { + notFound(); + } + + const breadcrumbs = buildDocBreadcrumbs( + slug, + typeof content.frontMatter.title === "string" ? + content.frontMatter.title + : undefined, + ); + + // Check if this is a dynamic guide by checking for guide.toml + const guideManifest = await parseGuideManifest(slug); + + if (guideManifest) { + // DYNAMIC GUIDE LOGIC + + // Flatten search params to Record for our cache function + const queryParams: Record = {}; + Object.entries(resolvedSearchParams).forEach(([key, value]) => { + if (typeof value === "string") { + queryParams[key] = value; + } else if (Array.isArray(value) && value.length > 0 && value[0]) { + // Take first value if array + queryParams[key] = value[0]; + } + }); + + // Fetch steps here (cached function) + const steps = await getCachedGuideSteps(slug, queryParams); + + const allHeadings = [...content.headings]; + if (steps.length > 0) { + // Add steps as headings in TOC, avoiding duplicates + const existingIds = new Set(allHeadings.map((h) => h.id)); + steps.forEach((step) => { + const stepId = `step-${step.stepNumber}`; + // Only add if ID doesn't already exist + if (!existingIds.has(stepId)) { + allHeadings.push({ + level: 2, + text: `${step.stepNumber}. ${step.title}`, + id: stepId, + }); + existingIds.add(stepId); + } + }); + } + + return ( + <> +
+ +
+ {content.isMDX ? + + :
} +
+ + + + {steps.length > 0 ? + step)} + stepsWithContent={steps} + currentSlug={slug} + /> + :
+ No steps found for this configuration. Please try different + options. +
+ } +
+ + + ); + } + + // STATIC GUIDE LOGIC (Fallback) + + // Discover step files for this starting point page + const steps = discoverStepFiles(slug); + + // Load step content server-side and pre-render MDX + const stepsWithContent = await Promise.all( + steps.map(async (step) => { + try { + const stepContent = await parseMarkdownContent(step.slug); + return { + ...step, + content: stepContent.content, + isMDX: stepContent.isMDX ?? false, + }; + } catch (error) { + console.error(`Failed to load step ${step.slug}:`, error); + return { + ...step, + content: null, + isMDX: false, + }; + } + }), + ); + + // Combine page headings with step headings for TOC + const allHeadings = [...content.headings]; + if (steps.length > 0) { + // Add steps as headings in TOC, avoiding duplicates + const existingIds = new Set(allHeadings.map((h) => h.id)); + steps.forEach((step) => { + const stepId = `step-${step.stepNumber}`; + // Only add if ID doesn't already exist + if (!existingIds.has(stepId)) { + allHeadings.push({ + level: 2, + text: `${step.stepNumber}. ${step.title}`, + id: stepId, + }); + existingIds.add(stepId); + } + }); + } + + return ( + <> +
+ +
+ {content.isMDX ? + + :
} +
+ {steps.length > 0 && ( + step, + )} + stepsWithContent={stepsWithContent} + currentSlug={slug} + /> + )} +
+ + + ); +} diff --git a/apps/framework-docs-v2/src/app/(docs)/guides/page.tsx b/apps/framework-docs-v2/src/app/(docs)/guides/page.tsx new file mode 100644 index 0000000000..d7261b2c69 --- /dev/null +++ b/apps/framework-docs-v2/src/app/(docs)/guides/page.tsx @@ -0,0 +1,63 @@ +import { notFound } from "next/navigation"; +import type { Metadata } from "next"; +import { parseMarkdownContent } from "@/lib/content"; +import { TOCNav } from "@/components/navigation/toc-nav"; +import { MDXRenderer } from "@/components/mdx-renderer"; +import { DocBreadcrumbs } from "@/components/navigation/doc-breadcrumbs"; +import { buildDocBreadcrumbs } from "@/lib/breadcrumbs"; + +// export const dynamic = "force-dynamic"; + +export async function generateMetadata(): Promise { + try { + const content = await parseMarkdownContent("guides/index"); + return { + title: + content.frontMatter.title ? + `${content.frontMatter.title} | MooseStack Documentation` + : "Guides | MooseStack Documentation", + description: + content.frontMatter.description || + "Comprehensive guides for building applications, managing data, and implementing data warehousing strategies", + }; + } catch (error) { + return { + title: "Guides | MooseStack Documentation", + description: + "Comprehensive guides for building applications, managing data, and implementing data warehousing strategies", + }; + } +} + +export default async function GuidesPage() { + let content; + try { + content = await parseMarkdownContent("guides/index"); + } catch (error) { + notFound(); + } + + const breadcrumbs = buildDocBreadcrumbs( + "guides/index", + typeof content.frontMatter.title === "string" ? + content.frontMatter.title + : undefined, + ); + + return ( + <> +
+ +
+ {content.isMDX ? + + :
} +
+
+ + + ); +} diff --git a/apps/framework-docs-v2/src/app/[...slug]/layout.tsx b/apps/framework-docs-v2/src/app/(docs)/layout.tsx similarity index 74% rename from apps/framework-docs-v2/src/app/[...slug]/layout.tsx rename to apps/framework-docs-v2/src/app/(docs)/layout.tsx index 043aa12216..fb0caf00fa 100644 --- a/apps/framework-docs-v2/src/app/[...slug]/layout.tsx +++ b/apps/framework-docs-v2/src/app/(docs)/layout.tsx @@ -1,5 +1,6 @@ import type { ReactNode } from "react"; import { Suspense } from "react"; +import { headers } from "next/headers"; import { SideNav } from "@/components/navigation/side-nav"; import { AnalyticsProvider } from "@/components/analytics-provider"; import { SidebarInset } from "@/components/ui/sidebar"; @@ -7,20 +8,22 @@ import { showDataSourcesPage } from "@/flags"; interface DocLayoutProps { children: ReactNode; - params: Promise<{ - slug?: string[]; - }>; } async function FilteredSideNav() { // Evaluate feature flag + // Note: Accessing headers() in the parent component marks this as dynamic, + // which allows Date.now() usage in the flags SDK const showDataSources = await showDataSourcesPage().catch(() => false); // Pass flag to SideNav, which will filter navigation items after language filtering return ; } -export default async function DocLayout({ children, params }: DocLayoutProps) { +export default async function DocLayout({ children }: DocLayoutProps) { + // Access headers() to mark this layout as dynamic, which allows Date.now() usage + // in the flags SDK without triggering Next.js static generation errors + await headers(); return (
diff --git a/apps/framework-docs-v2/src/app/api/templates/route.ts b/apps/framework-docs-v2/src/app/api/templates/route.ts index 4b4b63504e..9a2d2fe393 100644 --- a/apps/framework-docs-v2/src/app/api/templates/route.ts +++ b/apps/framework-docs-v2/src/app/api/templates/route.ts @@ -1,7 +1,7 @@ import { NextResponse } from "next/server"; import { getAllItems } from "@/lib/templates"; -export const dynamic = "force-static"; +// export const dynamic = "force-static"; export async function GET() { try { diff --git a/apps/framework-docs-v2/src/app/layout.tsx b/apps/framework-docs-v2/src/app/layout.tsx index 967ccb9aa6..036d2e3fdd 100644 --- a/apps/framework-docs-v2/src/app/layout.tsx +++ b/apps/framework-docs-v2/src/app/layout.tsx @@ -1,16 +1,13 @@ import type { Metadata } from "next"; import type { ReactNode } from "react"; import { Suspense } from "react"; -import { cookies } from "next/headers"; import "@/styles/globals.css"; import { ThemeProvider } from "@/components/theme-provider"; import { LanguageProviderWrapper } from "@/components/language-provider-wrapper"; -import { TopNav } from "@/components/navigation/top-nav"; +import { TopNavWithFlags } from "@/components/navigation/top-nav-with-flags"; import { SidebarProvider } from "@/components/ui/sidebar"; import { Toaster } from "@/components/ui/sonner"; import { ScrollRestoration } from "@/components/scroll-restoration"; -import { getGitHubStars } from "@/lib/github-stars"; -import { showHostingSection, showGuidesSection, showAiSection } from "@/flags"; import { VercelToolbar } from "@vercel/toolbar/next"; export const metadata: Metadata = { @@ -19,22 +16,13 @@ export const metadata: Metadata = { }; // Force dynamic to enable cookie-based flag overrides -export const dynamic = "force-dynamic"; +// export const dynamic = "force-dynamic"; export default async function RootLayout({ children, }: Readonly<{ children: ReactNode; }>) { - const stars = await getGitHubStars(); - - // Evaluate feature flags (reads cookies automatically for overrides) - const [showHosting, showGuides, showAi] = await Promise.all([ - showHostingSection().catch(() => false), - showGuidesSection().catch(() => false), - showAiSection().catch(() => true), - ]); - const shouldInjectToolbar = process.env.NODE_ENV === "development"; return ( @@ -51,14 +39,7 @@ export default async function RootLayout({
- }> - - + {children}
diff --git a/apps/framework-docs-v2/src/app/page.tsx b/apps/framework-docs-v2/src/app/page.tsx index 6d280ef3c0..d65e2187e3 100644 --- a/apps/framework-docs-v2/src/app/page.tsx +++ b/apps/framework-docs-v2/src/app/page.tsx @@ -11,7 +11,7 @@ import { IconDatabase, IconCloud, IconSparkles } from "@tabler/icons-react"; import { showHostingSection, showAiSection } from "@/flags"; import { cn } from "@/lib/utils"; -export const dynamic = "force-dynamic"; +// export const dynamic = "force-dynamic"; export default async function HomePage() { // Evaluate feature flags diff --git a/apps/framework-docs-v2/src/app/templates/layout.tsx b/apps/framework-docs-v2/src/app/templates/layout.tsx new file mode 100644 index 0000000000..25d5d3afb1 --- /dev/null +++ b/apps/framework-docs-v2/src/app/templates/layout.tsx @@ -0,0 +1,31 @@ +import type { ReactNode } from "react"; +import { Suspense } from "react"; +import { TemplatesSideNav } from "./templates-side-nav"; +import { AnalyticsProvider } from "@/components/analytics-provider"; +import { SidebarInset } from "@/components/ui/sidebar"; + +interface TemplatesLayoutProps { + children: ReactNode; +} + +export default async function TemplatesLayout({ + children, +}: TemplatesLayoutProps) { + return ( + +
+ }> + + + +
+ {/* Reserve space for the right TOC on xl+ screens */} +
+ {children} +
+
+
+
+
+ ); +} diff --git a/apps/framework-docs-v2/src/app/templates/page.tsx b/apps/framework-docs-v2/src/app/templates/page.tsx new file mode 100644 index 0000000000..0bae069442 --- /dev/null +++ b/apps/framework-docs-v2/src/app/templates/page.tsx @@ -0,0 +1,62 @@ +import { notFound } from "next/navigation"; +import type { Metadata } from "next"; +import { parseMarkdownContent } from "@/lib/content"; +import { TOCNav } from "@/components/navigation/toc-nav"; +import { MDXRenderer } from "@/components/mdx-renderer"; +import { DocBreadcrumbs } from "@/components/navigation/doc-breadcrumbs"; +import { buildDocBreadcrumbs } from "@/lib/breadcrumbs"; + +// export const dynamic = "force-dynamic"; + +export async function generateMetadata(): Promise { + try { + const content = await parseMarkdownContent("templates/index"); + return { + title: + content.frontMatter.title ? + `${content.frontMatter.title} | MooseStack Documentation` + : "Templates & Apps | MooseStack Documentation", + description: + content.frontMatter.description || + "Browse templates and demo apps for MooseStack", + }; + } catch (error) { + return { + title: "Templates & Apps | MooseStack Documentation", + description: "Browse templates and demo apps for MooseStack", + }; + } +} + +export default async function TemplatesPage() { + let content; + try { + content = await parseMarkdownContent("templates/index"); + } catch (error) { + notFound(); + } + + const breadcrumbs = buildDocBreadcrumbs( + "templates/index", + typeof content.frontMatter.title === "string" ? + content.frontMatter.title + : undefined, + ); + + return ( + <> +
+ +
+ {content.isMDX ? + + :
} +
+
+ + + ); +} diff --git a/apps/framework-docs-v2/src/app/templates/templates-side-nav.tsx b/apps/framework-docs-v2/src/app/templates/templates-side-nav.tsx new file mode 100644 index 0000000000..dc0d894822 --- /dev/null +++ b/apps/framework-docs-v2/src/app/templates/templates-side-nav.tsx @@ -0,0 +1,296 @@ +"use client"; + +import * as React from "react"; +import { useSearchParams, useRouter, usePathname } from "next/navigation"; +import { + Sidebar, + SidebarContent, + SidebarGroup, + SidebarGroupLabel, + SidebarMenu, + SidebarMenuButton, + SidebarMenuItem, +} from "@/components/ui/sidebar"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Label } from "@/components/ui/label"; +import { IconX } from "@tabler/icons-react"; + +type LanguageFilter = "typescript" | "python" | null; +type CategoryFilter = ("starter" | "framework" | "example")[]; +type TypeFilter = "template" | "app" | null; + +export function TemplatesSideNav() { + const router = useRouter(); + const pathname = usePathname(); + const searchParams = useSearchParams(); + + // Get filter values from URL params + const typeFilter = (searchParams.get("type") as TypeFilter) || null; + const languageFilter = + (searchParams.get("language") as LanguageFilter) || null; + const categoryFilter = React.useMemo(() => { + const categoryParam = searchParams.get("category"); + if (!categoryParam) return []; + return categoryParam + .split(",") + .filter( + (c): c is "starter" | "framework" | "example" => + c === "starter" || c === "framework" || c === "example", + ); + }, [searchParams]); + + const hasActiveFilters = + typeFilter !== null || languageFilter !== null || categoryFilter.length > 0; + + // Update URL params when filters change + const updateFilters = React.useCallback( + (updates: { + type?: TypeFilter; + language?: LanguageFilter; + category?: CategoryFilter; + }) => { + const params = new URLSearchParams(searchParams.toString()); + + if (updates.type !== undefined) { + if (updates.type === null) { + params.delete("type"); + } else { + params.set("type", updates.type); + } + } + + if (updates.language !== undefined) { + if (updates.language === null) { + params.delete("language"); + } else { + params.set("language", updates.language); + } + } + + if (updates.category !== undefined) { + if (updates.category.length === 0) { + params.delete("category"); + } else { + params.set("category", updates.category.join(",")); + } + } + + router.push(`${pathname}?${params.toString()}`); + }, + [router, pathname, searchParams], + ); + + const clearFilters = () => { + updateFilters({ type: null, language: null, category: [] }); + }; + + return ( + + + + Filters + + {/* Type Filter */} + +
+ +
+
+ { + if (checked) { + updateFilters({ type: "template" }); + } else { + updateFilters({ type: null }); + } + }} + /> + +
+
+ { + if (checked) { + updateFilters({ type: "app" }); + } else { + updateFilters({ type: null }); + } + }} + /> + +
+
+
+
+ + {/* Language Filter */} + +
+ +
+
+ { + if (checked) { + updateFilters({ language: "typescript" }); + } else { + updateFilters({ language: null }); + } + }} + /> + +
+
+ { + if (checked) { + updateFilters({ language: "python" }); + } else { + updateFilters({ language: null }); + } + }} + /> + +
+
+
+
+ + {/* Category Filter */} + +
+ +
+
+ { + if (checked) { + updateFilters({ + category: [...categoryFilter, "starter"], + }); + } else { + updateFilters({ + category: categoryFilter.filter( + (c) => c !== "starter", + ), + }); + } + }} + /> + +
+
+ { + if (checked) { + updateFilters({ + category: [...categoryFilter, "framework"], + }); + } else { + updateFilters({ + category: categoryFilter.filter( + (c) => c !== "framework", + ), + }); + } + }} + /> + +
+
+ { + if (checked) { + updateFilters({ + category: [...categoryFilter, "example"], + }); + } else { + updateFilters({ + category: categoryFilter.filter( + (c) => c !== "example", + ), + }); + } + }} + /> + +
+
+
+
+ + {/* Clear Filters Button */} + {hasActiveFilters && ( + + + + Clear Filters + + + )} +
+
+
+
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/dynamic-guide-builder.tsx b/apps/framework-docs-v2/src/components/guides/dynamic-guide-builder.tsx new file mode 100644 index 0000000000..0b4b4b19e1 --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/dynamic-guide-builder.tsx @@ -0,0 +1,17 @@ +"use client"; + +import React from "react"; +import { GuideForm } from "./guide-form"; +import { GuideManifest } from "@/lib/guide-types"; + +interface DynamicGuideBuilderProps { + manifest: GuideManifest; +} + +export function DynamicGuideBuilder({ manifest }: DynamicGuideBuilderProps) { + return ( +
+ +
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/guide-form.tsx b/apps/framework-docs-v2/src/components/guides/guide-form.tsx new file mode 100644 index 0000000000..1849a6a6f5 --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/guide-form.tsx @@ -0,0 +1,213 @@ +"use client"; + +import * as React from "react"; +import { useForm } from "@tanstack/react-form"; +import { useRouter, useSearchParams, usePathname } from "next/navigation"; +import { z } from "zod"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Label } from "@/components/ui/label"; +import { Button } from "@/components/ui/button"; +import { GuideManifest } from "@/lib/guide-types"; +import { useLanguage } from "@/hooks/use-language"; + +interface GuideFormProps { + manifest: GuideManifest; +} + +export function GuideForm({ manifest }: GuideFormProps) { + const router = useRouter(); + const pathname = usePathname(); + const searchParams = useSearchParams(); + const { language } = useLanguage(); + + // Create a dynamic schema based on the manifest options + const schema = React.useMemo(() => { + const shape: Record = {}; + manifest.options.forEach((option) => { + shape[option.id] = z.string().min(1, "Selection is required"); + }); + return z.object(shape); + }, [manifest]); + + // Initialize default values from URL search params, manifest defaults, or language preference + const defaultValues = React.useMemo(() => { + const values: Record = {}; + manifest.options.forEach((option) => { + const paramValue = searchParams.get(option.id); + if (paramValue) { + values[option.id] = paramValue; + } else if (option.id === "lang") { + // For language, use the global language preference (from localStorage or default to typescript) + values[option.id] = language === "typescript" ? "ts" : "python"; + } else if (option.defaultValue) { + values[option.id] = option.defaultValue; + } else { + // Default to first value if available, to ensure we have a valid state if possible + if (option.values.length > 0 && option.values[0]) { + values[option.id] = option.values[0].id; + } + } + }); + return values; + }, [manifest, searchParams, language]); + + const form = useForm({ + defaultValues, + onSubmit: async ({ value }) => { + // Validate before submitting + const result = schema.safeParse(value); + if (!result.success) { + console.error("Form validation failed:", result.error); + return; + } + // Update URL with new values, preserving lang param + const params = new URLSearchParams(searchParams.toString()); + Object.entries(value).forEach(([key, val]) => { + if (val) { + params.set(key, val as string); + } else { + params.delete(key); + } + }); + router.push(`${pathname}?${params.toString()}`, { scroll: false }); + }, + }); + + // Update form when guide-related searchParams change + React.useEffect(() => { + const currentValues: Record = {}; + let hasGuideParams = false; + + manifest.options.forEach((option) => { + const paramValue = searchParams.get(option.id); + if (paramValue) { + currentValues[option.id] = paramValue; + hasGuideParams = true; + } else if (option.id === "lang") { + // Sync language from global preference if not in URL + currentValues[option.id] = language === "typescript" ? "ts" : "python"; + hasGuideParams = true; + } + }); + + // Only update if we have guide params and they differ from form state + if (hasGuideParams) { + const needsUpdate = manifest.options.some((option) => { + const currentValue = currentValues[option.id]; + const formValue = form.state.values[option.id] as string | undefined; + return currentValue && currentValue !== formValue; + }); + + if (needsUpdate) { + Object.entries(currentValues).forEach(([key, value]) => { + form.setFieldValue(key, value); + }); + } + } + }, [searchParams, manifest, language]); + + return ( +
+
+

Customize Your Guide

+

+ Select your stack preferences to get a tailored guide. +

+
+ +
{ + e.preventDefault(); + e.stopPropagation(); + form.handleSubmit(); + }} + className="grid gap-6 md:grid-cols-2 lg:grid-cols-3" + > + {manifest.options.map((option) => ( + { + // Handle language option specially - sync with global language preference + const handleLanguageChange = (value: string) => { + field.handleChange(value); + // Map guide form language values to URL lang param + const langParam = value === "ts" ? "typescript" : "python"; + const params = new URLSearchParams(searchParams.toString()); + params.set("lang", langParam); + // Preserve all other guide params + manifest.options.forEach((opt) => { + if (opt.id !== option.id && form.state.values[opt.id]) { + params.set(opt.id, form.state.values[opt.id] as string); + } + }); + router.push(`${pathname}?${params.toString()}`, { + scroll: false, + }); + }; + + const handleOtherChange = (value: string) => { + field.handleChange(value); + // Update URL directly without going through form submit to avoid loops + const params = new URLSearchParams(searchParams.toString()); + params.set(option.id, value); + // Preserve all other guide params (including lang) + manifest.options.forEach((opt) => { + if (opt.id !== option.id && form.state.values[opt.id]) { + params.set(opt.id, form.state.values[opt.id] as string); + } + }); + // Preserve lang param + const langParam = searchParams.get("lang"); + if (langParam) { + params.set("lang", langParam); + } + router.push(`${pathname}?${params.toString()}`, { + scroll: false, + }); + }; + + return ( +
+ + + {( + field.state.meta.isTouched && field.state.meta.errors.length + ) ? +

+ {field.state.meta.errors.join(", ")} +

+ : null} +
+ ); + }} + /> + ))} + +
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/guide-steps-nav-buttons.tsx b/apps/framework-docs-v2/src/components/guides/guide-steps-nav-buttons.tsx new file mode 100644 index 0000000000..dc810aa1af --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/guide-steps-nav-buttons.tsx @@ -0,0 +1,47 @@ +"use client"; + +import * as React from "react"; +import { Button } from "@/components/ui/button"; +import { IconChevronLeft, IconChevronRight } from "@tabler/icons-react"; + +interface GuideStepsNavButtonsProps { + currentStepIndex: number; + totalSteps: number; + onPrevious: () => void; + onNext: () => void; +} + +export function GuideStepsNavButtons({ + currentStepIndex, + totalSteps, + onPrevious, + onNext, +}: GuideStepsNavButtonsProps) { + const hasPrevious = currentStepIndex > 0; + const hasNext = currentStepIndex < totalSteps - 1; + + return ( +
+ + +
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/guide-steps-nav.tsx b/apps/framework-docs-v2/src/components/guides/guide-steps-nav.tsx new file mode 100644 index 0000000000..c21d443733 --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/guide-steps-nav.tsx @@ -0,0 +1,190 @@ +"use client"; + +import * as React from "react"; +import { usePathname, useSearchParams } from "next/navigation"; +import Link from "next/link"; +import { IconChevronLeft, IconChevronRight } from "@tabler/icons-react"; +import { Button } from "@/components/ui/button"; +import { Badge } from "@/components/ui/badge"; +import { useLanguage } from "@/hooks/use-language"; + +interface Step { + slug: string; + stepNumber: number; + title: string; +} + +interface GuideStepsNavProps { + steps: Step[]; + currentSlug: string; + children?: React.ReactNode; +} + +export function GuideStepsNav({ + steps, + currentSlug, + children, +}: GuideStepsNavProps) { + const pathname = usePathname(); + const searchParams = useSearchParams(); + const { language } = useLanguage(); + const [currentStepIndex, setCurrentStepIndex] = React.useState(0); + + // Determine current step from URL hash or default to first step + React.useEffect(() => { + const hash = window.location.hash; + if (hash) { + const stepMatch = hash.match(/step-(\d+)/); + if (stepMatch) { + const stepNum = parseInt(stepMatch[1]!, 10); + const index = steps.findIndex((s) => s.stepNumber === stepNum); + if (index >= 0) { + setCurrentStepIndex(index); + } + } + } + }, [steps]); + + // Update URL hash and show/hide steps when step changes + React.useEffect(() => { + if (steps.length > 0 && currentStepIndex < steps.length) { + const currentStep = steps[currentStepIndex]; + if (currentStep) { + const hasPrevious = currentStepIndex > 0; + const hasNext = currentStepIndex < steps.length - 1; + + // Update URL hash + window.history.replaceState( + null, + "", + `${pathname}${searchParams.toString() ? `?${searchParams.toString()}` : ""}#step-${currentStep.stepNumber}`, + ); + + // Show/hide step content + const stepContents = document.querySelectorAll(".step-content"); + stepContents.forEach((content, index) => { + if (index === currentStepIndex) { + content.classList.remove("hidden"); + content.classList.add("block"); + } else { + content.classList.add("hidden"); + content.classList.remove("block"); + } + }); + + // Update card header with current step info + const cardTitle = document.querySelector(".step-card-title"); + const cardBadge = document.querySelector(".step-card-badge"); + const buttonsContainer = document.getElementById( + "step-nav-buttons-container", + ); + if (cardTitle) cardTitle.textContent = currentStep.title; + if (cardBadge) + cardBadge.textContent = currentStep.stepNumber.toString(); + + // Update navigation buttons + if (buttonsContainer) { + buttonsContainer.innerHTML = ` + + + `; + } + } + } + }, [currentStepIndex, steps, pathname, searchParams]); + + if (steps.length === 0) return null; + + const currentStep = steps[currentStepIndex]; + const hasPrevious = currentStepIndex > 0; + const hasNext = currentStepIndex < steps.length - 1; + + const goToStep = (index: number) => { + if (index >= 0 && index < steps.length) { + setCurrentStepIndex(index); + // Scroll to top of steps section + const element = document.getElementById("guide-steps"); + if (element) { + element.scrollIntoView({ behavior: "smooth", block: "start" }); + } + } + }; + + // Expose goToStep to window for button onclick handlers + React.useEffect(() => { + (window as any).__goToStep = goToStep; + return () => { + delete (window as any).__goToStep; + }; + }, [goToStep]); + + const buildUrl = (stepSlug: string) => { + const params = new URLSearchParams(searchParams.toString()); + params.set("lang", language); + return `/${stepSlug}?${params.toString()}`; + }; + + return ( + <> +
+

Implementation Steps

+
+ {steps.map((step, index) => ( + + ))} +
+
+ + {children} + + {/* Step list for navigation */} +
+

All Steps

+
+ {steps.map((step, index) => ( + { + e.preventDefault(); + goToStep(index); + }} + className={`flex items-center gap-3 rounded-md px-3 py-2 text-sm transition-colors ${ + index === currentStepIndex ? + "bg-accent text-accent-foreground" + : "hover:bg-accent/50" + }`} + > + + {step.stepNumber} + + {step.title} + + ))} +
+
+ + ); +} diff --git a/apps/framework-docs-v2/src/components/guides/guide-steps-wrapper.tsx b/apps/framework-docs-v2/src/components/guides/guide-steps-wrapper.tsx new file mode 100644 index 0000000000..6d24116b76 --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/guide-steps-wrapper.tsx @@ -0,0 +1,98 @@ +import { Suspense } from "react"; +import { GuideStepsNav } from "./guide-steps-nav"; +import { StepContent } from "./step-content"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { Badge } from "@/components/ui/badge"; + +interface GuideStepsWrapperProps { + steps: Array<{ + slug: string; + stepNumber: number; + title: string; + }>; + stepsWithContent: Array<{ + slug: string; + stepNumber: number; + title: string; + content: string | null; + isMDX: boolean; + }>; + currentSlug: string; +} + +async function StepContentWrapper({ + content, + isMDX, + slug, + index, +}: { + content: string; + isMDX: boolean; + slug: string; + index: number; +}) { + return ( +
+ +
+ ); +} + +export function GuideStepsWrapper({ + steps, + stepsWithContent, + currentSlug, +}: GuideStepsWrapperProps) { + // Render all step content with Suspense for async MDX rendering + const renderedSteps = stepsWithContent.map((step, index) => { + if (!step.content) return null; + return ( + +
Loading step content...
+
+ } + > + + + ); + }); + + return ( +
+ + + +
+
+ + {steps[0]?.stepNumber || 1} + + + {steps[0]?.title || "Step 1"} + +
+
+
+
+ +
{renderedSteps}
+
+
+
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/guide-steps.tsx b/apps/framework-docs-v2/src/components/guides/guide-steps.tsx new file mode 100644 index 0000000000..c32fbfe46b --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/guide-steps.tsx @@ -0,0 +1,185 @@ +"use client"; + +import * as React from "react"; +import { usePathname, useSearchParams } from "next/navigation"; +import Link from "next/link"; +import { IconChevronLeft, IconChevronRight } from "@tabler/icons-react"; +import { Button } from "@/components/ui/button"; +import { Badge } from "@/components/ui/badge"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { useLanguage } from "@/hooks/use-language"; + +interface Step { + slug: string; + stepNumber: number; + title: string; +} + +interface GuideStepsProps { + steps: Step[]; + renderedSteps: React.ReactElement[]; + currentSlug: string; +} + +export function GuideSteps({ + steps, + renderedSteps, + currentSlug, +}: GuideStepsProps) { + const pathname = usePathname(); + const searchParams = useSearchParams(); + const { language } = useLanguage(); + const [currentStepIndex, setCurrentStepIndex] = React.useState(0); + + // Determine current step from URL hash or default to first step + React.useEffect(() => { + const hash = window.location.hash; + if (hash) { + const stepMatch = hash.match(/step-(\d+)/); + if (stepMatch) { + const stepNum = parseInt(stepMatch[1]!, 10); + const index = steps.findIndex((s) => s.stepNumber === stepNum); + if (index >= 0) { + setCurrentStepIndex(index); + } + } + } + }, [steps]); + + // Update URL hash when step changes + React.useEffect(() => { + if (steps.length > 0 && currentStepIndex < steps.length) { + const currentStep = steps[currentStepIndex]; + if (currentStep) { + window.history.replaceState( + null, + "", + `${pathname}${searchParams.toString() ? `?${searchParams.toString()}` : ""}#step-${currentStep.stepNumber}`, + ); + } + } + }, [currentStepIndex, steps, pathname, searchParams]); + + if (steps.length === 0) return null; + + const currentStep = steps[currentStepIndex]; + if (!currentStep) return null; + + const currentRenderedStep = renderedSteps[currentStepIndex]; + const hasPrevious = currentStepIndex > 0; + const hasNext = currentStepIndex < steps.length - 1; + + const goToStep = (index: number) => { + if (index >= 0 && index < steps.length) { + setCurrentStepIndex(index); + // Scroll to top of steps section + const element = document.getElementById("guide-steps"); + if (element) { + element.scrollIntoView({ behavior: "smooth", block: "start" }); + } + } + }; + + const buildUrl = (stepSlug: string) => { + const params = new URLSearchParams(searchParams.toString()); + params.set("lang", language); + return `/${stepSlug}?${params.toString()}`; + }; + + return ( +
+
+

Implementation Steps

+
+ {steps.map((step, index) => ( + + ))} +
+
+ + + +
+
+ {currentStep.stepNumber} + {currentStep.title} +
+
+ + +
+
+
+ +
+ {renderedSteps.map((stepContent, index) => ( +
+ {stepContent || ( +
+ Step content not available +
+ )} +
+ ))} +
+
+
+ + {/* Step list for navigation */} +
+

All Steps

+
+ {steps.map((step, index) => ( + { + e.preventDefault(); + goToStep(index); + }} + className={`flex items-center gap-3 rounded-md px-3 py-2 text-sm transition-colors ${ + index === currentStepIndex ? + "bg-accent text-accent-foreground" + : "hover:bg-accent/50" + }`} + > + + {step.stepNumber} + + {step.title} + + ))} +
+
+
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/step-content.tsx b/apps/framework-docs-v2/src/components/guides/step-content.tsx new file mode 100644 index 0000000000..b64c151cfc --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/step-content.tsx @@ -0,0 +1,22 @@ +import { MDXRenderer } from "@/components/mdx-renderer"; + +interface StepContentProps { + content: string; + isMDX: boolean; +} + +export async function StepContent({ content, isMDX }: StepContentProps) { + if (!content) { + return ( +
Step content not available
+ ); + } + + return ( +
+ {isMDX ? + + :
} +
+ ); +} diff --git a/apps/framework-docs-v2/src/components/mdx-renderer.tsx b/apps/framework-docs-v2/src/components/mdx-renderer.tsx index cb30fc5550..9e7e80d5b6 100644 --- a/apps/framework-docs-v2/src/components/mdx-renderer.tsx +++ b/apps/framework-docs-v2/src/components/mdx-renderer.tsx @@ -27,40 +27,32 @@ import { Security, BreakingChanges, TemplatesGridServer, + CommandSnippet, } from "@/components/mdx"; import { FileTreeFolder, FileTreeFile } from "@/components/mdx/file-tree"; import { CodeEditor } from "@/components/ui/shadcn-io/code-editor"; import { Separator } from "@/components/ui/separator"; import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs"; import { Badge } from "@/components/ui/badge"; +import { IconTerminal, IconFileCode } from "@tabler/icons-react"; import { - IconTerminal, - IconFileCode, - IconRocket, - IconDatabase, - IconDeviceLaptop, - IconBrandGithub, - IconInfoCircle, - IconCheck, - IconClock, -} from "@tabler/icons-react"; -import { - MDXPre, - MDXCode, - MDXFigure, -} from "@/components/mdx/code-block-wrapper"; -import { PathConfig } from "@/lib/path-config"; + ServerCodeBlock, + ServerInlineCode, +} from "@/components/mdx/server-code-block"; +import { ServerFigure } from "@/components/mdx/server-figure"; import Link from "next/link"; import remarkGfm from "remark-gfm"; import rehypeSlug from "rehype-slug"; import rehypeAutolinkHeadings from "rehype-autolink-headings"; import rehypePrettyCode from "rehype-pretty-code"; +import { rehypeCodeMeta } from "@/lib/rehype-code-meta"; interface MDXRendererProps { source: string; } export async function MDXRenderer({ source }: MDXRendererProps) { + "use cache"; // Create FileTree with nested components const FileTreeWithSubcomponents = Object.assign(FileTree, { Folder: FileTreeFolder, @@ -120,6 +112,7 @@ export async function MDXRenderer({ source }: MDXRendererProps) { Security, BreakingChanges, TemplatesGridServer, + CommandSnippet, CodeEditor, Separator, Tabs, @@ -132,10 +125,10 @@ export async function MDXRenderer({ source }: MDXRendererProps) { SourceCodeLink, Link, - figure: MDXFigure, - // wrap with not-prose class - pre: MDXPre, - code: MDXCode, + // Code block handling - server-side rendered + figure: ServerFigure, + pre: ServerCodeBlock, + code: ServerInlineCode, }; return ( @@ -153,10 +146,10 @@ export async function MDXRenderer({ source }: MDXRendererProps) { { theme: "github-dark", keepBackground: false, - // Keep rehype-pretty-code for now to mark code blocks, - // but our components will handle the actual rendering }, ], + // Generic plugin to extract all meta attributes as data-* props + rehypeCodeMeta, ], }, }} diff --git a/apps/framework-docs-v2/src/components/mdx/code-block-wrapper.tsx b/apps/framework-docs-v2/src/components/mdx/code-block-wrapper.tsx index 7efea85c06..76302de0f7 100644 --- a/apps/framework-docs-v2/src/components/mdx/code-block-wrapper.tsx +++ b/apps/framework-docs-v2/src/components/mdx/code-block-wrapper.tsx @@ -76,6 +76,7 @@ interface MDXCodeProps extends React.HTMLAttributes { "data-rehype-pretty-code-fragment"?: string; "data-rehype-pretty-code-title"?: string; "data-filename"?: string; + "data-copy"?: string; children?: React.ReactNode; } @@ -365,7 +366,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { props["data-rehype-pretty-code-title"] || props["data-filename"] || props["title"]; - const hasCopy = props["data-copy"] !== undefined; + const hasCopy = props["data-copy"] !== "false"; const isShell = SHELL_LANGUAGES.has(language); const isConfigFile = CONFIG_LANGUAGES.has(language); @@ -377,7 +378,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { code={codeText} language={language} filename={filename || undefined} - copyButton={true} + copyButton={hasCopy} />
); @@ -430,7 +431,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { code={codeText} language={language || "typescript"} filename={filename || undefined} - copyButton={true} + copyButton={hasCopy} /> ); @@ -453,7 +454,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { props["data-rehype-pretty-code-title"] || props["data-filename"] || props["title"]; // Also check for title prop directly - const hasCopy = props["data-copy"] !== undefined; + const hasCopy = props["data-copy"] !== "false"; const isShell = SHELL_LANGUAGES.has(language); const isConfigFile = CONFIG_LANGUAGES.has(language); @@ -473,7 +474,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { code={codeText} language={language} filename={filename || undefined} - copyButton={true} + copyButton={hasCopy} /> ); @@ -506,7 +507,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { } // If filename is provided and no copy attribute, use animated CodeEditor - if (filename && !hasCopy) { + if (filename && props["data-copy"] === undefined) { // Determine if this is a terminal based on language const isTerminalLang = SHELL_LANGUAGES.has(language); return ( @@ -531,7 +532,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { code={codeText} language={language || "typescript"} filename={filename || undefined} - copyButton={true} + copyButton={hasCopy} /> ); @@ -543,9 +544,15 @@ export function MDXCode({ children, className, ...props }: MDXCodeProps) { const isInline = !className?.includes("language-") && !props["data-language"]; if (isInline) { - // Inline code - render as normal code element + // Inline code - render as normal code element with proper styling return ( - + {children} ); @@ -572,6 +579,7 @@ export function MDXCode({ children, className, ...props }: MDXCodeProps) { // Config files use CodeSnippet const filename = props["data-rehype-pretty-code-title"] || props["data-filename"]; + const hasCopy = props["data-copy"] !== "false"; return (
@@ -579,19 +587,20 @@ export function MDXCode({ children, className, ...props }: MDXCodeProps) { code={codeText} language={language} filename={filename} - copyButton={true} + copyButton={hasCopy} />
); } // Default to CodeSnippet for editable code blocks + const hasCopy = props["data-copy"] !== "false"; return (
); diff --git a/apps/framework-docs-v2/src/components/mdx/code-snippet.tsx b/apps/framework-docs-v2/src/components/mdx/code-snippet.tsx index 3316156f11..538a48d884 100644 --- a/apps/framework-docs-v2/src/components/mdx/code-snippet.tsx +++ b/apps/framework-docs-v2/src/components/mdx/code-snippet.tsx @@ -11,12 +11,23 @@ import { CodeBlockContent, } from "@/components/ui/shadcn-io/code-block"; +/** + * Parsed substring highlight with optional occurrence filter + */ +interface SubstringHighlight { + pattern: string; + occurrences?: number[]; +} + interface CodeSnippetProps { code: string; language?: string; filename?: string; copyButton?: boolean; lineNumbers?: boolean; + highlightLines?: number[]; + highlightStrings?: SubstringHighlight[]; + isAnsi?: boolean; className?: string; } @@ -59,14 +70,299 @@ function CopyButton({ ); } +/** + * Parse ANSI escape codes and convert to styled HTML + */ +function parseAnsi(text: string): string { + const colors: Record = { + 30: "color: #000", + 31: "color: #c00", + 32: "color: #0a0", + 33: "color: #a50", + 34: "color: #00a", + 35: "color: #a0a", + 36: "color: #0aa", + 37: "color: #aaa", + 90: "color: #555", + 91: "color: #f55", + 92: "color: #5f5", + 93: "color: #ff5", + 94: "color: #55f", + 95: "color: #f5f", + 96: "color: #5ff", + 97: "color: #fff", + }; + + const bgColors: Record = { + 40: "background-color: #000", + 41: "background-color: #c00", + 42: "background-color: #0a0", + 43: "background-color: #a50", + 44: "background-color: #00a", + 45: "background-color: #a0a", + 46: "background-color: #0aa", + 47: "background-color: #aaa", + 100: "background-color: #555", + 101: "background-color: #f55", + 102: "background-color: #5f5", + 103: "background-color: #ff5", + 104: "background-color: #55f", + 105: "background-color: #f5f", + 106: "background-color: #5ff", + 107: "background-color: #fff", + }; + + // biome-ignore lint/complexity/useRegexLiterals: Using constructor to avoid control character lint error + const ansiPattern = new RegExp("\\x1b\\[([0-9;]*)m", "g"); + let result = ""; + let lastIndex = 0; + let currentStyles: string[] = []; + + let match = ansiPattern.exec(text); + while (match !== null) { + const textBefore = text.slice(lastIndex, match.index); + if (textBefore) { + const escapedText = textBefore + .replace(/&/g, "&") + .replace(//g, ">"); + + if (currentStyles.length > 0) { + result += `${escapedText}`; + } else { + result += escapedText; + } + } + + const codes = match[1] ? match[1].split(";").map(Number) : [0]; + + for (const code of codes) { + if (code === 0) { + currentStyles = []; + } else if (code === 1) { + currentStyles.push("font-weight: bold"); + } else if (code === 2) { + currentStyles.push("opacity: 0.75"); + } else if (code === 3) { + currentStyles.push("font-style: italic"); + } else if (code === 4) { + currentStyles.push("text-decoration: underline"); + } else if (code === 9) { + currentStyles.push("text-decoration: line-through"); + } else if (colors[code]) { + currentStyles.push(colors[code]); + } else if (bgColors[code]) { + currentStyles.push(bgColors[code]); + } + } + + lastIndex = ansiPattern.lastIndex; + match = ansiPattern.exec(text); + } + + const remainingText = text.slice(lastIndex); + if (remainingText) { + const escapedText = remainingText + .replace(/&/g, "&") + .replace(//g, ">"); + + if (currentStyles.length > 0) { + result += `${escapedText}`; + } else { + result += escapedText; + } + } + + return result; +} + +/** + * Custom CodeBlockContent that supports line and substring highlighting + */ +function HighlightedCodeBlockContent({ + code, + language, + highlightLines, + highlightStrings, +}: { + code: string; + language: string; + highlightLines: number[]; + highlightStrings: SubstringHighlight[]; +}) { + const [highlightedCode, setHighlightedCode] = React.useState(""); + const [isLoading, setIsLoading] = React.useState(true); + + React.useEffect(() => { + const loadHighlightedCode = async () => { + try { + const { codeToHtml } = await import("shiki"); + + const languageMap: Record = { + gitignore: "text", + env: "text", + dotenv: "text", + }; + const mappedLanguage = languageMap[language.toLowerCase()] || language; + + const html = await codeToHtml(code, { + lang: mappedLanguage, + themes: { + light: "vitesse-light", + dark: "vitesse-dark", + }, + transformers: [ + { + line(node, line) { + // Add highlighted class to specified lines + if (highlightLines.includes(line)) { + this.addClassToHast(node, "highlighted"); + } + }, + }, + ], + }); + + // Apply substring highlighting if needed + let finalHtml = html; + if (highlightStrings.length > 0) { + finalHtml = applySubstringHighlighting(html, highlightStrings); + } + + setHighlightedCode(finalHtml); + setIsLoading(false); + } catch { + // Fallback + try { + const { codeToHtml } = await import("shiki"); + const html = await codeToHtml(code, { + lang: "text", + themes: { + light: "vitesse-light", + dark: "vitesse-dark", + }, + }); + setHighlightedCode(html); + } catch { + const lines = code.split("\n"); + const html = `
${lines.map((line) => `${line.replace(//g, ">")}`).join("\n")}
`; + setHighlightedCode(html); + } + setIsLoading(false); + } + }; + + loadHighlightedCode(); + }, [code, language, highlightLines, highlightStrings]); + + if (isLoading) { + return ( +
+        
+          {code.split("\n").map((line, i) => (
+            // biome-ignore lint/suspicious/noArrayIndexKey: Static code lines have no unique ID
+            
+              {line}
+            
+          ))}
+        
+      
+ ); + } + + return ( +
+ ); +} + +function escapeRegExp(string: string): string { + return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function applySubstringHighlighting( + html: string, + highlightStrings: SubstringHighlight[], +): string { + let result = html; + + for (const { pattern, occurrences } of highlightStrings) { + const escapedPattern = escapeRegExp(pattern); + let occurrenceCount = 0; + + // Replace pattern occurrences, respecting occurrence filter + result = result.replace( + new RegExp(`(?<=>)([^<]*?)${escapedPattern}`, "g"), + (match, prefix) => { + occurrenceCount++; + const shouldHighlight = + !occurrences || occurrences.includes(occurrenceCount); + + if (shouldHighlight) { + return `>${prefix}${pattern}`; + } + return match; + }, + ); + } + + return result; +} + export function CodeSnippet({ code, language = "typescript", filename, copyButton = true, lineNumbers = true, + highlightLines = [], + highlightStrings = [], + isAnsi = false, className, }: CodeSnippetProps) { + // For ANSI blocks, render with ANSI parsing + if (isAnsi) { + const lines = code.split("\n"); + return ( +
+ {copyButton && } + {filename && ( +
+ {filename} +
+ )} +
+
+            
+              {lines.map((line, i) => (
+                // biome-ignore lint/suspicious/noArrayIndexKey: Static code lines have no unique ID
+                
+                  
+                
+              ))}
+            
+          
+
+
+ ); + } + + // Check if we need custom highlighting + const needsCustomHighlighting = + highlightLines.length > 0 || highlightStrings.length > 0; + return (
- - {item.code} - + {needsCustomHighlighting ? + + : + {item.code} + + } )} diff --git a/apps/framework-docs-v2/src/components/mdx/command-snippet.tsx b/apps/framework-docs-v2/src/components/mdx/command-snippet.tsx new file mode 100644 index 0000000000..b43cbaf35b --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/command-snippet.tsx @@ -0,0 +1,42 @@ +"use client"; + +import * as React from "react"; +import { + Snippet, + SnippetHeader, + SnippetTabsList, + SnippetTabsTrigger, + SnippetTabsContent, + SnippetCopyButton, +} from "@/components/ui/snippet"; + +interface CommandSnippetProps { + initCommand?: string; + listCommand?: string; + initLabel?: string; + listLabel?: string; +} + +export function CommandSnippet({ + initCommand = "moose init PROJECT_NAME TEMPLATE_NAME", + listCommand = "moose template list", + initLabel = "Init", + listLabel = "List", +}: CommandSnippetProps) { + const [value, setValue] = React.useState("init"); + const currentCommand = value === "init" ? initCommand : listCommand; + + return ( + + + + {initLabel} + {listLabel} + + + + {initCommand} + {listCommand} + + ); +} diff --git a/apps/framework-docs-v2/src/components/mdx/file-tree.tsx b/apps/framework-docs-v2/src/components/mdx/file-tree.tsx index 52343f95cd..36d9602df2 100644 --- a/apps/framework-docs-v2/src/components/mdx/file-tree.tsx +++ b/apps/framework-docs-v2/src/components/mdx/file-tree.tsx @@ -1,37 +1,128 @@ "use client"; -import React from "react"; +import * as React from "react"; +import { IconChevronRight, IconFile, IconFolder } from "@tabler/icons-react"; +import { + Collapsible, + CollapsibleContent, + CollapsibleTrigger, +} from "@/components/ui/collapsible"; +import { cn } from "@/lib/utils"; + +// ============================================================================ +// FileTree Root +// ============================================================================ interface FileTreeProps { children: React.ReactNode; + className?: string; } +/** + * FileTree component for MDX documentation + * + * Usage in MDX: + * ```mdx + * + * + * + * + * + * + * + * + * ``` + */ +export function FileTree({ children, className }: FileTreeProps) { + return ( +
+
    {children}
+
+ ); +} + +// ============================================================================ +// FileTreeFolder +// ============================================================================ + interface FileTreeFolderProps { name: string; children?: React.ReactNode; + defaultOpen?: boolean; } -interface FileTreeFileProps { - name: string; +export function FileTreeFolder({ + name, + children, + defaultOpen = true, +}: FileTreeFolderProps) { + return ( +
  • + + + + + +
      + {children} +
    +
    +
    +
  • + ); } -export function FileTree({ children }: FileTreeProps) { - return
    {children}
    ; +// ============================================================================ +// FileTreeFile +// ============================================================================ + +interface FileTreeFileProps { + name: string; } -export function FileTreeFolder({ name, children }: FileTreeFolderProps) { +export function FileTreeFile({ name }: FileTreeFileProps) { return ( -
    -
    {name}/
    -
    {children}
    -
    +
  • +
    svg]:size-4 [&>svg]:shrink-0", + )} + > + + {name} +
    +
  • ); } -export function FileTreeFile({ name }: FileTreeFileProps) { - return
    {name}
    ; -} +// ============================================================================ +// Attach sub-components for dot notation +// ============================================================================ -// Attach sub-components to FileTree for nested usage -(FileTree as any).Folder = FileTreeFolder; -(FileTree as any).File = FileTreeFile; +FileTree.Folder = FileTreeFolder; +FileTree.File = FileTreeFile; diff --git a/apps/framework-docs-v2/src/components/mdx/index.ts b/apps/framework-docs-v2/src/components/mdx/index.ts index ebdb8480cf..5320abeeac 100644 --- a/apps/framework-docs-v2/src/components/mdx/index.ts +++ b/apps/framework-docs-v2/src/components/mdx/index.ts @@ -8,8 +8,12 @@ export { } from "./staggered-card"; export { Callout } from "./callout"; export { LanguageTabs, LanguageTabContent } from "./language-tabs"; +export { CommandSnippet } from "./command-snippet"; export { CodeSnippet } from "./code-snippet"; export { CodeEditorWrapper } from "./code-editor-wrapper"; +export { ShellSnippet } from "./shell-snippet"; +export { ServerCodeBlock, ServerInlineCode } from "./server-code-block"; +export { ServerFigure } from "./server-figure"; export { ToggleBlock } from "./toggle-block"; export { BulletPointsCard, diff --git a/apps/framework-docs-v2/src/components/mdx/inline-code.tsx b/apps/framework-docs-v2/src/components/mdx/inline-code.tsx new file mode 100644 index 0000000000..374c8a4f49 --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/inline-code.tsx @@ -0,0 +1,87 @@ +"use client"; + +import * as React from "react"; +import { cn } from "@/lib/utils"; + +interface InlineCodeProps { + code: string; + language: string; + className?: string; +} + +const darkModeStyles = cn( + "dark:[&_.shiki]:!text-[var(--shiki-dark)]", + "dark:[&_.shiki_span]:!text-[var(--shiki-dark)]", +); + +/** + * Inline code with syntax highlighting + * Used for the Nextra-style `code{:lang}` syntax + */ +export function InlineCode({ code, language, className }: InlineCodeProps) { + const [highlightedCode, setHighlightedCode] = React.useState(""); + const [isLoading, setIsLoading] = React.useState(true); + + React.useEffect(() => { + const loadHighlightedCode = async () => { + try { + const { codeToHtml } = await import("shiki"); + + const html = await codeToHtml(code, { + lang: language, + themes: { + light: "vitesse-light", + dark: "vitesse-dark", + }, + }); + + // Extract just the code content, removing the pre/code wrapper + // The output is usually:
    ...
    + const match = html.match(/]*>([\s\S]*)<\/code>/); + if (match?.[1]) { + // Remove the line span wrapper for inline display + const content = match[1].replace( + /([\s\S]*?)<\/span>/g, + "$1", + ); + setHighlightedCode(content); + } else { + setHighlightedCode(code); + } + setIsLoading(false); + } catch { + // Fallback to plain text + setHighlightedCode(code); + setIsLoading(false); + } + }; + + loadHighlightedCode(); + }, [code, language]); + + if (isLoading) { + return ( + + {code} + + ); + } + + return ( + + ); +} diff --git a/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx b/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx new file mode 100644 index 0000000000..bf523147a7 --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx @@ -0,0 +1,441 @@ +import React from "react"; +import { cn } from "@/lib/utils"; +import { CodeSnippet } from "./code-snippet"; +import { CodeEditorWrapper } from "./code-editor-wrapper"; +import { ShellSnippet } from "./shell-snippet"; +import { InlineCode } from "./inline-code"; +import { extractTextContent } from "@/lib/extract-text-content"; + +// Shell languages that should use terminal styling +const SHELL_LANGUAGES = new Set([ + "bash", + "sh", + "shell", + "zsh", + "fish", + "powershell", + "cmd", +]); + +// Config/data file languages that should always use static CodeSnippet +const CONFIG_LANGUAGES = new Set([ + "toml", + "yaml", + "yml", + "json", + "jsonc", + "ini", + "properties", + "config", +]); + +/** + * Parsed substring highlight with optional occurrence filter + */ +interface SubstringHighlight { + pattern: string; + occurrences?: number[]; +} + +/** + * Props interface for server-side code block + * All data-* attributes from markdown are available here + */ +export interface ServerCodeBlockProps + extends React.HTMLAttributes { + // Standard rehype-pretty-code attributes + "data-language"?: string; + "data-theme"?: string; + "data-rehype-pretty-code-fragment"?: string; + "data-rehype-pretty-code-title"?: string; + + // Custom attributes from markdown meta + "data-filename"?: string; + "data-copy"?: string; + "data-variant"?: string; + "data-duration"?: string; + "data-delay"?: string; + "data-writing"?: string; + "data-linenumbers"?: string; + "data-showlinenumbers"?: string; + + // Line and substring highlighting (Nextra-style) + "data-highlight-lines"?: string; + "data-highlight-strings"?: string; + + // Animation flag (Nextra extension) + "data-animate"?: string; + + children?: React.ReactNode; +} + +/** + * Extracts the language from data attributes or className + */ +function getLanguage(props: ServerCodeBlockProps): string { + const dataLang = props["data-language"]; + if (dataLang) { + return dataLang.toLowerCase(); + } + + if (typeof props.className === "string") { + const match = props.className.match(/language-(\w+)/); + if (match?.[1]) { + return match[1].toLowerCase(); + } + } + + return ""; +} + +/** + * Find the code element in children + */ +function findCodeElement( + node: React.ReactNode, + depth = 0, +): React.ReactElement | undefined { + if (depth > 10) return undefined; + + if (Array.isArray(node)) { + for (const item of node) { + const found = findCodeElement(item, depth + 1); + if (found) return found; + } + return undefined; + } + + if (!React.isValidElement(node)) return undefined; + + const nodeType = node.type; + const nodeProps = (node.props as Record) || {}; + + if (nodeType === React.Fragment && nodeProps.children) { + return findCodeElement(nodeProps.children as React.ReactNode, depth + 1); + } + + if (typeof nodeType === "string" && nodeType === "code") { + return node; + } + + if (nodeProps.children) { + return findCodeElement(nodeProps.children as React.ReactNode, depth + 1); + } + + return undefined; +} + +/** + * Parse line highlight specification into array of line numbers + * Handles: "1", "1,4-5", "1-3,7,9-11" + */ +function parseLineHighlights(spec: string | undefined): number[] { + if (!spec) return []; + + const lines: number[] = []; + const parts = spec.split(","); + + for (const part of parts) { + const trimmed = part.trim(); + if (trimmed.includes("-")) { + const [start, end] = trimmed.split("-").map((n) => parseInt(n, 10)); + if ( + start !== undefined && + end !== undefined && + !isNaN(start) && + !isNaN(end) + ) { + for (let i = start; i <= end; i++) { + lines.push(i); + } + } + } else { + const num = parseInt(trimmed, 10); + if (!isNaN(num)) { + lines.push(num); + } + } + } + + return lines; +} + +/** + * Parse substring highlights from JSON string + */ +function parseSubstringHighlights( + jsonStr: string | undefined, +): SubstringHighlight[] { + if (!jsonStr) return []; + + try { + return JSON.parse(jsonStr) as SubstringHighlight[]; + } catch { + return []; + } +} + +/** + * Server-side code block component + * + * Extracts all code block attributes and routes to the appropriate + * client-side component based on language and attributes. + * + * Supports Nextra-style syntax: + * - ```js {1,4-5} → Line highlighting + * - ```js /useState/ → Substring highlighting + * - ```js copy → Copy button + * - ```js showLineNumbers→ Line numbers + * - ```js filename="x" → File header + * - ```js animate → Animated typing effect + */ +export function ServerCodeBlock({ + children, + ...props +}: ServerCodeBlockProps): React.ReactElement { + // Check if this is a code block processed by rehype-pretty-code + const isCodeBlock = props["data-rehype-pretty-code-fragment"] !== undefined; + + if (!isCodeBlock) { + // Not a code block, render as regular pre element + const { className, ...restProps } = props; + return ( +
    +        {children}
    +      
    + ); + } + + // Extract code content + const codeElement = findCodeElement(children); + const codeText = + codeElement ? + extractTextContent( + (codeElement.props as Record) + .children as React.ReactNode, + ).trim() + : extractTextContent(children).trim(); + + // Extract all attributes (supports multiple sources for backwards compat) + const language = getLanguage(props); + + // Filename: check title (from rehype-pretty-code), filename, or direct title + const filename = + props["data-rehype-pretty-code-title"] || + props["data-filename"] || + ((props as Record)["title"] as string | undefined); + + // Copy button: defaults to true unless explicitly set to "false" + const showCopy = props["data-copy"] !== "false"; + + // Variant: "terminal" or "ide" + const variant = props["data-variant"] as "terminal" | "ide" | undefined; + + // Animation settings - explicit animate flag takes precedence + const animateFlag = props["data-animate"]; + const shouldAnimate = animateFlag === "true"; + const shouldNotAnimate = animateFlag === "false"; + + const duration = + props["data-duration"] ? parseFloat(props["data-duration"]) : undefined; + const delay = + props["data-delay"] ? parseFloat(props["data-delay"]) : undefined; + const writing = props["data-writing"] !== "false"; + + // Line numbers: support both linenumbers and showlinenumbers + const lineNumbersFlag = + props["data-showlinenumbers"] ?? props["data-linenumbers"]; + const lineNumbers = lineNumbersFlag !== "false"; + + // Highlighting + const highlightLines = parseLineHighlights(props["data-highlight-lines"]); + const highlightStrings = parseSubstringHighlights( + props["data-highlight-strings"], + ); + + // Determine component type based on language and attributes + const isShell = SHELL_LANGUAGES.has(language); + const isConfigFile = CONFIG_LANGUAGES.has(language); + const isAnsi = language === "ansi"; + + // ANSI blocks render as plain text with ANSI escape code handling + if (isAnsi) { + return ( +
    + +
    + ); + } + + // Routing logic: + // 1. Config files → Always static CodeSnippet (never animated unless explicit) + // 2. Explicit animate flag → Use CodeEditorWrapper + // 3. Explicit animate=false → Use CodeSnippet + // 4. Shell + filename + copy=false → Animated CodeEditorWrapper (terminal style) + // 5. Shell (all other cases) → ShellSnippet (copyable Terminal tab UI) + // 6. Non-shell + filename + no copy attr + no animate=false → Animated CodeEditorWrapper + // 7. Default → Static CodeSnippet + + // Config files use static CodeSnippet unless explicitly animated + if (isConfigFile && !shouldAnimate) { + return ( +
    + +
    + ); + } + + // Explicit animate flag + if (shouldAnimate) { + return ( +
    + +
    + ); + } + + // Shell commands: Use animated terminal only when explicitly copy=false with filename + // and animate flag is not explicitly false + // Otherwise, always use ShellSnippet (the Terminal tab UI with copy button) + if (isShell) { + // Only use animated terminal when explicitly no copy button wanted + if (filename && props["data-copy"] === "false" && !shouldNotAnimate) { + return ( +
    + +
    + ); + } + + // All other shell commands use ShellSnippet (Terminal tab with copy) + return ( +
    + +
    + ); + } + + // Non-shell: animate if filename present and copy not explicitly set + // unless animate is explicitly false + const legacyAnimate = + filename && props["data-copy"] === undefined && !shouldNotAnimate; + + if (legacyAnimate) { + return ( +
    + +
    + ); + } + + // Default: static CodeSnippet + return ( +
    + +
    + ); +} + +/** + * Server-side inline code component + * + * Supports Nextra-style inline highlighting: `code{:lang}` + */ +export function ServerInlineCode({ + children, + className, + ...props +}: React.HTMLAttributes): React.ReactElement { + const isCodeBlock = + className?.includes("language-") || + (props as Record)["data-language"]; + + if (isCodeBlock) { + // This is a code block that should be handled by ServerCodeBlock + // This is a fallback for when code is not wrapped in pre + const language = getLanguage(props as ServerCodeBlockProps); + const codeText = extractTextContent(children).trim(); + + return ( +
    + +
    + ); + } + + // Check for inline code with language hint: `code{:lang}` + const textContent = + typeof children === "string" ? children : extractTextContent(children); + const inlineLangMatch = textContent.match(/^(.+)\{:(\w+)\}$/); + + if (inlineLangMatch) { + const [, code, lang] = inlineLangMatch; + if (code && lang) { + return ; + } + } + + // Inline code - simple styled element + return ( + + {children} + + ); +} diff --git a/apps/framework-docs-v2/src/components/mdx/server-figure.tsx b/apps/framework-docs-v2/src/components/mdx/server-figure.tsx new file mode 100644 index 0000000000..d8b2bf90c8 --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/server-figure.tsx @@ -0,0 +1,125 @@ +import React from "react"; + +interface MDXFigureProps extends React.HTMLAttributes { + "data-rehype-pretty-code-figure"?: string; + children?: React.ReactNode; +} + +/** + * Extracts text content from a React node (for figcaption titles) + */ +function extractTextFromNode(node: React.ReactNode): string { + if (typeof node === "string") { + return node; + } + if (typeof node === "number") { + return String(node); + } + if (Array.isArray(node)) { + return node.map(extractTextFromNode).join(""); + } + if (React.isValidElement(node)) { + const props = node.props as Record; + return extractTextFromNode(props.children as React.ReactNode); + } + return ""; +} + +/** + * Server-side component that handles figure wrapper from rehype-pretty-code + * Extracts the title from figcaption and passes it to the pre element + */ +export function ServerFigure({ + children, + ...props +}: MDXFigureProps): React.ReactElement { + // Only handle code block figures + // data-rehype-pretty-code-figure is present (even if empty string) for code blocks + if (props["data-rehype-pretty-code-figure"] === undefined) { + return
    {children}
    ; + } + + // For code blocks, extract figcaption title and pass to pre + const childrenArray = React.Children.toArray(children); + + // Find figcaption and pre elements + let figcaption: React.ReactElement | null = null; + let preElement: React.ReactElement | null = null; + + childrenArray.forEach((child) => { + if (React.isValidElement(child)) { + const childType = child.type; + const childProps = (child.props as Record) || {}; + + // Check if it's a native HTML element by checking if type is a string + if (typeof childType === "string") { + if (childType === "figcaption") { + figcaption = child; + } else if (childType === "pre") { + preElement = child; + } + } else { + // For React components (like ServerCodeBlock) + // Check if it has code block attributes + const hasCodeBlockAttrs = + childProps["data-rehype-pretty-code-fragment"] !== undefined || + childProps["data-language"] !== undefined || + childProps["data-theme"] !== undefined; + + // If it has code block attributes, it's the pre element + if (hasCodeBlockAttrs || !preElement) { + preElement = child; + } + } + } + }); + + // Extract filename from figcaption (title from markdown) + let figcaptionTitle: string | undefined; + if (figcaption !== null) { + const figcaptionProps = (figcaption as React.ReactElement).props as Record< + string, + unknown + >; + figcaptionTitle = extractTextFromNode( + figcaptionProps.children as React.ReactNode, + ).trim(); + } + + const preProps = + preElement ? + ((preElement as React.ReactElement).props as Record) || + {} + : {}; + + // Prioritize figcaption title (from markdown title="...") over any existing attributes + const filename = + figcaptionTitle || + (preProps["data-rehype-pretty-code-title"] as string | undefined) || + (preProps["data-filename"] as string | undefined); + + // If we have a pre element, ensure the filename is set on both attributes + if (preElement) { + const hasCodeBlockAttrs = + preProps["data-language"] !== undefined || + preProps["data-theme"] !== undefined; + const fragmentValue = + preProps["data-rehype-pretty-code-fragment"] !== undefined ? + preProps["data-rehype-pretty-code-fragment"] + : hasCodeBlockAttrs ? "" + : undefined; + + const updatedPre = React.cloneElement(preElement, { + ...preProps, + "data-filename": filename || undefined, + "data-rehype-pretty-code-title": filename || undefined, + ...(fragmentValue !== undefined ? + { "data-rehype-pretty-code-fragment": fragmentValue } + : {}), + }); + return <>{updatedPre}; + } + + // Fallback: render children + return <>{children}; +} diff --git a/apps/framework-docs-v2/src/components/mdx/shell-snippet.tsx b/apps/framework-docs-v2/src/components/mdx/shell-snippet.tsx new file mode 100644 index 0000000000..1ad348c608 --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/shell-snippet.tsx @@ -0,0 +1,36 @@ +"use client"; + +import React from "react"; +import { + Snippet, + SnippetCopyButton, + SnippetHeader, + SnippetTabsContent, + SnippetTabsList, + SnippetTabsTrigger, +} from "@/components/ui/snippet"; + +interface ShellSnippetProps { + code: string; + language: string; +} + +/** + * Client component for shell/terminal code snippets + * Displays with "Terminal" label and copy button + */ +export function ShellSnippet({ code, language }: ShellSnippetProps) { + const [value, setValue] = React.useState("terminal"); + + return ( + + + + Terminal + + + + {code} + + ); +} diff --git a/apps/framework-docs-v2/src/components/mdx/template-card.tsx b/apps/framework-docs-v2/src/components/mdx/template-card.tsx index 0e3ccba497..c229f41f38 100644 --- a/apps/framework-docs-v2/src/components/mdx/template-card.tsx +++ b/apps/framework-docs-v2/src/components/mdx/template-card.tsx @@ -11,7 +11,8 @@ import { CardFooter, CardHeader, } from "@/components/ui/card"; -import { IconBrandGithub } from "@tabler/icons-react"; +import { IconBrandGithub, IconRocket, IconBook } from "@tabler/icons-react"; +import { Button } from "@/components/ui/button"; import { Snippet, SnippetCopyButton, @@ -51,12 +52,7 @@ export function TemplateCard({ item, className }: TemplateCardProps) { const isTemplate = item.type === "template"; const template = isTemplate ? (item as TemplateMetadata) : null; const app = !isTemplate ? (item as AppMetadata) : null; - - const categoryColors = { - starter: "border-blue-200 dark:border-blue-800", - framework: "border-purple-200 dark:border-purple-800", - example: "border-green-200 dark:border-green-800", - }; + const [chipsExpanded, setChipsExpanded] = React.useState(false); const categoryLabels = { starter: "Starter", @@ -78,103 +74,120 @@ export function TemplateCard({ item, className }: TemplateCardProps) { const description = isTemplate ? template!.description : app!.description; const name = isTemplate ? template!.name : app!.name; + // Combine frameworks and features into a single array with type info + const allChips = [ + ...frameworks.map((f) => ({ value: f, type: "framework" as const })), + ...features.map((f) => ({ value: f, type: "feature" as const })), + ]; + + const MAX_VISIBLE_CHIPS = 3; + const visibleChips = + chipsExpanded ? allChips : allChips.slice(0, MAX_VISIBLE_CHIPS); + const hiddenCount = allChips.length - MAX_VISIBLE_CHIPS; + return ( - -
    -
    -
    - {language && ( - - {language === "typescript" ? "TS" : "Python"} - - )} - {isTemplate && template && ( - - {categoryLabels[template.category]} - - )} - {!isTemplate && ( - - Demo App - - )} -
    -

    - {isTemplate ? formatTemplateName(name) : name} -

    -
    + +
    + {(() => { + const labels: string[] = []; + if (language) { + labels.push(language === "typescript" ? "TypeScript" : "Python"); + } + if (isTemplate && template) { + labels.push(categoryLabels[template.category]); + } + if (!isTemplate) { + labels.push("Demo App"); + } + return ( + + {labels.join(" • ")} + + ); + })()}
    -
    - - {description} - - {frameworks.length > 0 && ( -
    -

    - Frameworks: -

    -
    - {frameworks.map((framework) => ( - - {framework} - - ))} -
    +

    + {isTemplate ? formatTemplateName(name) : name} +

    + {allChips.length > 0 && ( +
    + {visibleChips.map((chip) => ( + + {chip.value} + + ))} + {!chipsExpanded && hiddenCount > 0 && ( + setChipsExpanded(true)} + > + {hiddenCount} more + + )} + {chipsExpanded && ( + setChipsExpanded(false)} + > + Show less + + )}
    )} - - {features.length > 0 && ( -
    -

    - Features: -

    -
    - {features.map((feature) => ( - - {feature} - - ))} -
    -
    - )} - - + + + {description} {isTemplate && template && (
    )} - {!isTemplate && app && app.blogPost && ( - - Read Blog Post → - - )} - - - View on GitHub - +
    + +
    + + {!isTemplate && app && app.blogPost && ( + + )} + +
    ); diff --git a/apps/framework-docs-v2/src/components/mdx/template-grid.tsx b/apps/framework-docs-v2/src/components/mdx/template-grid.tsx index 753ec43fa1..23eb147392 100644 --- a/apps/framework-docs-v2/src/components/mdx/template-grid.tsx +++ b/apps/framework-docs-v2/src/components/mdx/template-grid.tsx @@ -1,11 +1,11 @@ "use client"; import * as React from "react"; +import { useSearchParams } from "next/navigation"; import { cn } from "@/lib/utils"; import { Input } from "@/components/ui/input"; import { Button } from "@/components/ui/button"; import { Badge } from "@/components/ui/badge"; -import { ToggleGroup, ToggleGroupItem } from "@/components/ui/toggle-group"; import { TemplateCard } from "./template-card"; import type { ItemMetadata, TemplateMetadata } from "@/lib/template-types"; import { IconSearch, IconX } from "@tabler/icons-react"; @@ -20,13 +20,33 @@ type CategoryFilter = ("starter" | "framework" | "example")[]; type TypeFilter = "template" | "app" | null; export function TemplateGrid({ items, className }: TemplateGridProps) { + const searchParams = useSearchParams(); const [searchQuery, setSearchQuery] = React.useState(""); - const [languageFilter, setLanguageFilter] = - React.useState(null); - const [categoryFilter, setCategoryFilter] = React.useState( - [], - ); - const [typeFilter, setTypeFilter] = React.useState(null); + + // Read filters from URL params (set by TemplatesSideNav) + const typeFilter = React.useMemo(() => { + const type = searchParams.get("type"); + return (type === "template" || type === "app" ? type : null) as TypeFilter; + }, [searchParams]); + + const languageFilter = React.useMemo(() => { + const language = searchParams.get("language"); + return ( + language === "typescript" || language === "python" ? + language + : null) as LanguageFilter; + }, [searchParams]); + + const categoryFilter = React.useMemo(() => { + const categoryParam = searchParams.get("category"); + if (!categoryParam) return []; + return categoryParam + .split(",") + .filter( + (c): c is "starter" | "framework" | "example" => + c === "starter" || c === "framework" || c === "example", + ) as CategoryFilter; + }, [searchParams]); const filteredItems = React.useMemo(() => { return items.filter((item) => { @@ -88,18 +108,10 @@ export function TemplateGrid({ items, className }: TemplateGridProps) { categoryFilter.length > 0 || typeFilter !== null; - const clearFilters = () => { - setSearchQuery(""); - setLanguageFilter(null); - setCategoryFilter([]); - setTypeFilter(null); - }; - return (
    - {/* Filters */} -
    - {/* Search */} + {/* Search - kept in main content area */} +
    )}
    - - {/* Type Filter */} -
    - - { - if (value === "" || value === undefined) { - setTypeFilter(null); - } else if (value === "template" || value === "app") { - setTypeFilter(value as TypeFilter); - } - }} - variant="outline" - className="w-full" - > - - Templates - - - Apps - - -
    - - {/* Language and Category Filters */} -
    -
    - - { - if (value === "" || value === undefined) { - setLanguageFilter(null); - } else if (value === "typescript" || value === "python") { - setLanguageFilter(value as LanguageFilter); - } - }} - variant="outline" - className="w-full" - > - - TypeScript - - - Python - - -
    - -
    - - { - setCategoryFilter(value as CategoryFilter); - }} - variant="outline" - className="w-full" - > - - Starter - - - Framework - - - Example - - -
    -
    - - {/* Clear filters button */} + {/* Results count */} {hasActiveFilters && ( -
    - +
    {filteredItems.length} item{filteredItems.length !== 1 ? "s" : ""} diff --git a/apps/framework-docs-v2/src/components/navigation/side-nav.tsx b/apps/framework-docs-v2/src/components/navigation/side-nav.tsx index e9ac6690fa..11ce273d6c 100644 --- a/apps/framework-docs-v2/src/components/navigation/side-nav.tsx +++ b/apps/framework-docs-v2/src/components/navigation/side-nav.tsx @@ -17,6 +17,7 @@ import { SidebarMenuSub, SidebarMenuSubButton, SidebarMenuSubItem, + SidebarMenuSubLabel, } from "@/components/ui/sidebar"; import { Collapsible, @@ -155,18 +156,42 @@ function NavItemComponent({ item }: { item: NavPage }) { const isActive = pathname === `/${item.slug}`; const hasChildren = item.children && item.children.length > 0; - const hasActiveChild = - hasChildren && - item.children?.some( - (child) => child.type === "page" && pathname === `/${child.slug}`, - ); - const defaultOpen = isActive || hasActiveChild; + + // Recursively check if any descendant is active + const hasActiveDescendant = React.useMemo(() => { + if (!hasChildren) return false; + + const checkDescendant = (children: NavItem[]): boolean => { + return children.some((child) => { + if (child.type === "page") { + if (pathname === `/${child.slug}`) return true; + if (child.children) return checkDescendant(child.children); + } + return false; + }); + }; + + return checkDescendant(item.children!); + }, [hasChildren, item.children, pathname]); + + const defaultOpen = isActive || hasActiveDescendant; + const [isOpen, setIsOpen] = React.useState(defaultOpen); + + // Update open state when active state changes + React.useEffect(() => { + setIsOpen(isActive || hasActiveDescendant); + }, [isActive, hasActiveDescendant]); if (hasChildren) { return ( - + - + {item.icon && } {item.title} @@ -183,67 +208,12 @@ function NavItemComponent({ item }: { item: NavPage }) { - {(() => { - const elements: React.ReactNode[] = []; - let currentGroup: NavPage[] = []; - let currentLabel: string | null = null; - - const flushGroup = () => { - if (currentGroup.length > 0) { - currentGroup.forEach((child: NavPage) => { - const childHref = (() => { - const params = new URLSearchParams( - searchParams.toString(), - ); - params.set("lang", language); - return `/${child.slug}?${params.toString()}`; - })(); - const childIsActive = pathname === `/${child.slug}`; - elements.push( - - - - {child.icon && ( - - )} - {child.title} - - - , - ); - }); - currentGroup = []; - } - }; - - item.children?.forEach((child) => { - if (child.type === "separator") { - flushGroup(); - currentLabel = null; - } else if (child.type === "label") { - flushGroup(); - currentLabel = child.title; - } else if (child.type === "page") { - if (currentLabel && currentGroup.length === 0) { - // Add label before first item in group - elements.push( - - {currentLabel} - , - ); - } - currentGroup.push(child); - } - }); - flushGroup(); - return elements; - })()} + {renderNavChildren( + item.children, + pathname, + searchParams, + language, + )} @@ -265,3 +235,124 @@ function NavItemComponent({ item }: { item: NavPage }) { ); } + +function NestedNavItemComponent({ + item, + pathname, + searchParams, + language, +}: { + item: NavPage; + pathname: string; + searchParams: URLSearchParams; + language: string; +}) { + const childHasChildren = item.children && item.children.length > 0; + const childHref = (() => { + const params = new URLSearchParams(searchParams.toString()); + params.set("lang", language); + return `/${item.slug}?${params.toString()}`; + })(); + const childIsActive = pathname === `/${item.slug}`; + + // Recursively check if any descendant is active + const checkDescendant = (children: NavItem[]): boolean => { + return children.some((c) => { + if (c.type === "page") { + if (pathname === `/${c.slug}`) return true; + if (c.children) return checkDescendant(c.children); + } + return false; + }); + }; + const hasActiveDescendant = + childHasChildren ? checkDescendant(item.children!) : false; + const defaultOpen = childIsActive || hasActiveDescendant; + const [isOpen, setIsOpen] = React.useState(defaultOpen); + + React.useEffect(() => { + setIsOpen(childIsActive || hasActiveDescendant); + }, [childIsActive, hasActiveDescendant]); + + if (childHasChildren) { + return ( + + + + + {item.icon && } + {item.title} + + + + + + Toggle + + + + + {renderNavChildren( + item.children!, + pathname, + searchParams, + language, + )} + + + + + ); + } + + return ( + + + + {item.icon && } + {item.title} + + + + ); +} + +function renderNavChildren( + children: NavItem[], + pathname: string, + searchParams: URLSearchParams, + language: string, +): React.ReactNode[] { + const elements: React.ReactNode[] = []; + let isFirstLabel = true; + + children.forEach((child, index) => { + if (child.type === "label") { + elements.push( + + {child.title} + , + ); + isFirstLabel = false; + } else if (child.type === "separator") { + // Separators are handled via label spacing - skip rendering them + return; + } else if (child.type === "page") { + elements.push( + , + ); + } + }); + + return elements; +} diff --git a/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx b/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx index 0be4c2b7f6..dabd27899c 100644 --- a/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx +++ b/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx @@ -1,9 +1,29 @@ "use client"; import { useEffect, useState } from "react"; +import { usePathname } from "next/navigation"; import { cn } from "@/lib/utils"; import type { Heading } from "@/lib/content-types"; -import { IconExternalLink } from "@tabler/icons-react"; +import { + IconExternalLink, + IconPlus, + IconInfoCircle, +} from "@tabler/icons-react"; +import { Button } from "@/components/ui/button"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { Label } from "../ui/label"; interface TOCNavProps { headings: Heading[]; @@ -15,6 +35,10 @@ interface TOCNavProps { export function TOCNav({ headings, helpfulLinks }: TOCNavProps) { const [activeId, setActiveId] = useState(""); + const [scope, setScope] = useState<"initiative" | "project">("initiative"); + const pathname = usePathname(); + const isGuidePage = + pathname?.startsWith("/guides/") && pathname !== "/guides"; useEffect(() => { if (headings.length === 0) return; @@ -123,15 +147,15 @@ export function TOCNav({ headings, helpfulLinks }: TOCNavProps) { } return ( -