diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9b7fa0951b6f1..ced3f0fb7bc07 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,8 +3,8 @@ .github/workflows/regression.yml @vectordotdev/vector @vectordotdev/single-machine-performance regression/config.yaml @vectordotdev/vector @vectordotdev/single-machine-performance -docs/ @vectordotdev/vector @vectordotdev/ux-team @vectordotdev/documentation -website/ @vectordotdev/vector @vectordotdev/ux-team +docs/ @vectordotdev/vector @vectordotdev/documentation +website/ @vectordotdev/vector website/content @vectordotdev/vector @vectordotdev/documentation website/cue/reference @vectordotdev/vector @vectordotdev/documentation diff --git a/.github/ISSUE_TEMPLATE/minor-release.md b/.github/ISSUE_TEMPLATE/minor-release.md index bcecf75087bc9..105299559632b 100644 --- a/.github/ISSUE_TEMPLATE/minor-release.md +++ b/.github/ISSUE_TEMPLATE/minor-release.md @@ -37,23 +37,23 @@ cargo vdev release prepare --version "${NEW_VECTOR_VERSION}" --vrl-version "${NE ``` Automated steps include: -- [ ] Create a new release branch from master to freeze commits +- Create a new release branch from master to freeze commits - `git fetch && git checkout origin/master && git checkout -b "${RELEASE_BRANCH}" && git push -u` -- [ ] Create a new release preparation branch from `master` +- Create a new release preparation branch from `master` - `git checkout -b "${PREP_BRANCH}" && git push -u` -- [ ] Pin VRL to latest released version rather than `main` -- [ ] Check if there is a newer version of [Alpine](https://alpinelinux.org/releases/) or [Debian](https://www.debian.org/releases/) available to update the release images in +- Pin VRL to latest released version rather than `main` +- Check if there is a newer version of [Alpine](https://alpinelinux.org/releases/) or [Debian](https://www.debian.org/releases/) available to update the release images in `distribution/docker/`. Update if so. -- [ ] Run `cargo vdev build release-cue` to generate a new cue file for the release - - [ ] Copy VRL changelogs from the VRL version in the last Vector release as a new changelog entry +- Run `cargo vdev build release-cue` to generate a new cue file for the release + - Copy VRL changelogs from the VRL version in the last Vector release as a new changelog entry ([example](https://github.com/vectordotdev/vector/blob/9c67bba358195f5018febca2f228dfcb2be794b5/website/cue/reference/releases/0.41.0.cue#L33-L64)) -- [ ] Update version number in `website/cue/reference/administration/interfaces/kubectl.cue` -- [ ] Update version number in `distribution/install.sh` -- [ ] Add new version to `website/cue/reference/versions.cue` -- [ ] Create new release md file by copying an existing one in `./website/content/en/releases/` and +- Update version number in `website/cue/reference/administration/interfaces/kubectl.cue` +- Update version number in `distribution/install.sh` +- Add new version to `website/cue/reference/versions.cue` +- Create new release md file by copying an existing one in `./website/content/en/releases/` and updating version number -- [ ] Commit these changes -- [ ] Open PR against the release branch (`"${RELEASE_BRANCH}"`) for review +- Commit these changes +- Open PR against the release branch (`"${RELEASE_BRANCH}"`) for review ## 3. Manual Steps @@ -94,7 +94,7 @@ Automated steps include: - [ ] Release updated Helm chart. See [releasing Helm chart](https://github.com/vectordotdev/helm-charts/blob/develop/RELEASING.md). - [ ] Release Homebrew. Refer to the internal releasing doc. - [ ] Create internal Docker images. Refer to the internal releasing doc. -- [ ] Update the latest [release tag](https://github.com/vectordotdev/vector/release) description with the release announcement. +- [ ] Update the latest [release tag](https://github.com/vectordotdev/vector/releases) description with the release announcement. - [ ] Create a new PR with title starting as `chore(releasing):` - [ ] Cherry-pick any release commits from the release branch that are not on `master`, to `master`. - [ ] Run `cargo vdev build manifests` and commit changes. diff --git a/.github/actions/install-vdev/action.yml b/.github/actions/install-vdev/action.yml index 7b35d99d5a253..da04417f67358 100644 --- a/.github/actions/install-vdev/action.yml +++ b/.github/actions/install-vdev/action.yml @@ -20,7 +20,8 @@ runs: uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ~/.cargo/bin/vdev - key: ${{ runner.os }}-vdev-${{ hashFiles('vdev/**', 'Cargo.toml', 'Cargo.lock') }} + # WARNING: this key need to be in sync with the key in .github/actions/setup/action.yml + key: ${{ runner.os }}-vdev-${{ hashFiles('vdev/**', 'lib/vector-vrl/**', 'Cargo.toml', 'Cargo.lock') }} restore-keys: | ${{ runner.os }}-vdev- @@ -36,7 +37,8 @@ runs: uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ~/.cargo/bin/vdev - key: ${{ runner.os }}-vdev-${{ hashFiles('vdev/**', 'Cargo.toml', 'Cargo.lock') }} + # WARNING: this key need to be in sync with the key in .github/actions/setup/action.yml + key: ${{ runner.os }}-vdev-${{ hashFiles('vdev/**', 'lib/vector-vrl/**', 'Cargo.toml', 'Cargo.lock') }} - name: Set VDEV environment variable shell: bash diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 99067675b8251..8dbb5416de92f 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -90,7 +90,8 @@ runs: uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ~/.cargo/bin/vdev - key: ${{ runner.os }}-vdev-${{ hashFiles('vdev/**', 'Cargo.toml', 'Cargo.lock') }} + # WARNING: this key need to be in sync with the key in .github/actions/install-vdev/action.yml + key: ${{ runner.os }}-vdev-${{ hashFiles('vdev/**', 'lib/vector-vrl/**', 'Cargo.toml', 'Cargo.lock') }} restore-keys: | ${{ runner.os }}-vdev- lookup-only: true @@ -211,7 +212,7 @@ runs: EOF - name: Install protoc - if: ${{ inputs.protoc == 'true' }} + if: ${{ inputs.protoc == 'true' || env.VDEV_NEEDS_COMPILE == 'true' }} shell: bash run: | echo "Installing protoc" diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index 81976d0cace2c..b82a12c395bd2 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -137,6 +137,7 @@ dsmith DVB ede emoji +emojis emqx enableable Enot @@ -145,7 +146,9 @@ EPC esbuild esensar etld +eventcreate eventloop +Evt Evercoss exactlyonce Explay @@ -497,6 +500,7 @@ Telstra Tencent Texet Thl +timediff timeframe timeseries timespan @@ -558,3 +562,5 @@ ZTE Zync sighup CLAUDE +linting +lexers diff --git a/.github/workflows/build-test-runner.yml b/.github/workflows/build-test-runner.yml index 82e247c6f6f96..50a46fd4bd30f 100644 --- a/.github/workflows/build-test-runner.yml +++ b/.github/workflows/build-test-runner.yml @@ -18,11 +18,14 @@ on: type: string permissions: - packages: write + contents: read jobs: build: runs-on: ubuntu-24.04 + permissions: + contents: read # Required by actions/checkout + packages: write # Required to push test runner image to GitHub Container Registry steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: diff --git a/.github/workflows/build_preview_sites.yml b/.github/workflows/build_preview_sites.yml index aa802412e0e7c..9df10ce781379 100644 --- a/.github/workflows/build_preview_sites.yml +++ b/.github/workflows/build_preview_sites.yml @@ -7,14 +7,17 @@ on: - completed permissions: - actions: read - issues: write - pull-requests: write - statuses: write + contents: read # Restrictive default jobs: deploy_vector_preview_site: if: ${{ github.event.workflow_run.conclusion == 'success' && contains(github.event.workflow_run.head_branch, 'website') }} + permissions: + contents: read # Required by the reusable workflow + actions: read # Required to download artifacts + issues: write # Required to post preview link comments + pull-requests: write # Required to post preview link comments + statuses: write # Required to update commit status uses: ./.github/workflows/create_preview_sites.yml with: APP_ID: "d1a7j77663uxsc" @@ -26,6 +29,12 @@ jobs: deploy_rust_doc_preview_site: if: ${{ github.event.workflow_run.conclusion == 'success' && contains(github.event.workflow_run.head_branch, 'website') }} + permissions: + contents: read # Required by the reusable workflow + actions: read # Required to download artifacts + issues: write # Required to post preview link comments + pull-requests: write # Required to post preview link comments + statuses: write # Required to update commit status uses: ./.github/workflows/create_preview_sites.yml with: APP_ID: "d1hoyoksbulg25" @@ -37,6 +46,12 @@ jobs: deploy_vrl_playground_preview_site: if: ${{ github.event.workflow_run.conclusion == 'success' && contains(github.event.workflow_run.head_branch, 'website') }} + permissions: + contents: read # Required by the reusable workflow + actions: read # Required to download artifacts + issues: write # Required to post preview link comments + pull-requests: write # Required to post preview link comments + statuses: write # Required to update commit status uses: ./.github/workflows/create_preview_sites.yml with: APP_ID: "d2lr4eds605rpz" diff --git a/.github/workflows/changelog.yaml b/.github/workflows/changelog.yaml index e1d6beffdbf84..188a41d368e2b 100644 --- a/.github/workflows/changelog.yaml +++ b/.github/workflows/changelog.yaml @@ -9,7 +9,7 @@ name: Changelog on: - pull_request_target: + pull_request: types: [opened, synchronize, reopened, labeled, unlabeled] # Required by GitHub merge queue due to branch protection rules. Should always be successful @@ -44,31 +44,14 @@ jobs: echo "merge_group event – passing without running changelog validation." exit 0 - # Checkout changelog script and changelog.d/ from master + # Checkout PR branch (includes script and changelog.d/) - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 if: env.SHOULD_RUN == 'true' with: - ref: master - sparse-checkout: | - scripts/check_changelog_fragments.sh - changelog.d/ - sparse-checkout-cone-mode: false - - # Checkout PR's changelog.d/ into tmp/ - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - if: env.SHOULD_RUN == 'true' - with: - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.sha }} - path: tmp - sparse-checkout: changelog.d/ + fetch-depth: 0 - name: Run changelog fragment checker if: env.SHOULD_RUN == 'true' run: | - # Overwrite changelog.d/*.md - rm -rf changelog.d/*.md && mv tmp/changelog.d/*.md changelog.d/ - - # Add files and then compare with HEAD instead of origin/master - git add changelog.d/ - MERGE_BASE=HEAD ./scripts/check_changelog_fragments.sh + # Compare against origin/master + ./scripts/check_changelog_fragments.sh diff --git a/.github/workflows/changes.yml b/.github/workflows/changes.yml index 256b74969cab6..25599942b02c9 100644 --- a/.github/workflows/changes.yml +++ b/.github/workflows/changes.yml @@ -149,6 +149,11 @@ on: value: ${{ jobs.int_tests.outputs.any }} e2e-tests-any: value: ${{ jobs.e2e_tests.outputs.any }} + +# Workflow-level permissions - read access to repository contents +permissions: + contents: read # Required to checkout code + env: BASE_SHA: ${{ inputs.base_ref || (github.event_name == 'merge_group' && github.event.merge_group.base_sha) || github.event.pull_request.base.sha }} HEAD_SHA: ${{ inputs.head_ref || (github.event_name == 'merge_group' && github.event.merge_group.head_sha) || github.event.pull_request.head.sha }} @@ -222,7 +227,11 @@ jobs: component_docs: - 'scripts/generate-component-docs.rb' - "vdev/**" - - 'website/cue/**/base/**.cue' + - 'website/cue/**/*.cue' + - 'docs/generated/**' + # If changes to the VRL sha is made the combined generated cue file will change which + # may cause issues + - 'Cargo.lock' - ".github/workflows/changes.yml" markdown: - '**/**.md' @@ -282,8 +291,10 @@ jobs: # Detects changes that are specific to integration tests int_tests: runs-on: ubuntu-24.04 - timeout-minutes: 5 + timeout-minutes: 15 if: ${{ inputs.int_tests }} + permissions: + contents: read outputs: amqp: ${{ steps.filter.outputs.amqp }} appsignal: ${{ steps.filter.outputs.appsignal}} @@ -398,7 +409,7 @@ jobs: echo "any=$any_changed" >> $GITHUB_OUTPUT - name: Upload JSON artifact - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: int_tests_changes path: int_tests_changes.json @@ -406,8 +417,10 @@ jobs: # Detects changes that are specific to e2e tests e2e_tests: runs-on: ubuntu-24.04 - timeout-minutes: 5 + timeout-minutes: 15 if: ${{ inputs.e2e_tests }} + permissions: + contents: read outputs: datadog-logs: ${{ steps.filter.outputs.datadog-logs }} datadog-metrics: ${{ steps.filter.outputs.datadog-metrics }} @@ -458,7 +471,7 @@ jobs: echo "any=$any_changed" >> $GITHUB_OUTPUT - name: Upload JSON artifact - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: e2e_tests_changes path: e2e_tests_changes.json diff --git a/.github/workflows/check_generated_vrl_docs.yml b/.github/workflows/check_generated_vrl_docs.yml new file mode 100644 index 0000000000000..7407b28260685 --- /dev/null +++ b/.github/workflows/check_generated_vrl_docs.yml @@ -0,0 +1,135 @@ +name: Check Generated VRL Docs Freshness + +on: + pull_request: + merge_group: + types: [checks_requested] + push: + branches: + - master + +concurrency: + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +env: + COMMIT_MESSAGE: "Update generated VRL docs" + COMMIT_AUTHOR: "github-actions[bot]" + +permissions: + contents: read + +jobs: + changes: + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + docs: ${{ steps.filter.outputs.docs }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2 + id: filter + with: + filters: | + docs: + - "lib/vector-vrl/**" + - "vdev/**" + - "Cargo.lock" + - ".github/workflows/check_generated_vrl_docs.yml" + + run-check-generated-vrl-docs: + needs: changes + if: needs.changes.outputs.docs == 'true' + runs-on: ubuntu-24.04-8core + permissions: + contents: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ github.event.pull_request.head.sha || github.sha }} + + - uses: ./.github/actions/setup + with: + vdev: true + mold: false + cargo-cache: false + + - name: Regenerate VRL docs + run: make generate-vector-vrl-docs + + - name: Check for changes + id: check + run: | + git add docs/generated/ + if git diff --cached --quiet; then + echo "changed=false" >> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + + - name: Check last commit + if: steps.check.outputs.changed == 'true' + id: last-commit + run: | + MSG=$(git log -1 --pretty=%s) + AUTHOR=$(git log -1 --pretty=%an) + if [ "$MSG" = "$COMMIT_MESSAGE" ] && [ "$AUTHOR" = "$COMMIT_AUTHOR" ]; then + echo "is-auto=true" >> "$GITHUB_OUTPUT" + else + echo "is-auto=false" >> "$GITHUB_OUTPUT" + fi + + - name: Commit and push + if: > + steps.check.outputs.changed == 'true' + && steps.last-commit.outputs.is-auto != 'true' + && github.event_name == 'pull_request' + && github.event.pull_request.head.repo.full_name == github.repository + id: push + continue-on-error: true + env: + HEAD_REF: ${{ github.head_ref }} + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git commit -m "$COMMIT_MESSAGE" + git push origin HEAD:refs/heads/$HEAD_REF + + - name: Save PR number for comment workflow + if: > + steps.check.outputs.changed == 'true' + && steps.last-commit.outputs.is-auto != 'true' + && github.event_name == 'pull_request' + && steps.push.outcome != 'success' + run: | + mkdir -p /tmp/docs-check + echo "${{ github.event.pull_request.number }}" > /tmp/docs-check/pr-number + + - name: Upload PR metadata + if: > + steps.check.outputs.changed == 'true' + && steps.last-commit.outputs.is-auto != 'true' + && github.event_name == 'pull_request' + && steps.push.outcome != 'success' + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: vrl-docs-check-pr + path: /tmp/docs-check/pr-number + + - name: Fail if docs are out of date + if: steps.check.outputs.changed == 'true' + run: | + echo "docs/generated/ is out of date. Regenerate with: make generate-vector-vrl-docs" + exit 1 + + check-generated-vrl-docs: + if: always() + runs-on: ubuntu-latest + needs: run-check-generated-vrl-docs + steps: + - run: | + if [[ "${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}" == "true" ]]; then + echo "One or more jobs failed or were cancelled" + exit 1 + fi diff --git a/.github/workflows/check_generated_vrl_docs_comment.yml b/.github/workflows/check_generated_vrl_docs_comment.yml new file mode 100644 index 0000000000000..25eed42cfeb28 --- /dev/null +++ b/.github/workflows/check_generated_vrl_docs_comment.yml @@ -0,0 +1,62 @@ +name: Comment on PR (Generated VRL Docs) + +on: + workflow_run: + workflows: ["Check Generated VRL Docs Freshness"] + types: + - completed + +permissions: + contents: read + +jobs: + comment: + runs-on: ubuntu-latest + if: > + github.event.workflow_run.conclusion == 'failure' + && github.event.workflow_run.event == 'pull_request' + permissions: + pull-requests: write + steps: + - name: Download PR metadata + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + with: + name: vrl-docs-check-pr + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ github.token }} + + - name: Comment on PR + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + script: | + const fs = require('fs'); + const prNumber = parseInt(fs.readFileSync('pr-number', 'utf8').trim(), 10); + if (isNaN(prNumber)) { + core.setFailed('Invalid PR number'); + return; + } + + const marker = ''; + const body = marker + '\nThe `docs/generated/` folder is out of date but I was unable to push the fix automatically.\n\nPlease run the following and commit the result:\n\n```\nmake generate-vector-vrl-docs\n```'; + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + }); + const existing = comments.find(c => c.body.startsWith(marker)); + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body, + }); + } diff --git a/.github/workflows/ci-integration-review.yml b/.github/workflows/ci-integration-review.yml index d634a98391c13..1195b4372d97b 100644 --- a/.github/workflows/ci-integration-review.yml +++ b/.github/workflows/ci-integration-review.yml @@ -33,8 +33,7 @@ on: types: [ submitted ] permissions: - statuses: write - packages: write + contents: read env: AXIOM_TOKEN: ${{ secrets.AXIOM_TOKEN }} @@ -55,6 +54,8 @@ jobs: runs-on: ubuntu-24.04 timeout-minutes: 5 if: startsWith(github.event.review.body, '/ci-run-integration') || startsWith(github.event.review.body, '/ci-run-e2e') || contains(github.event.review.body, '/ci-run-all') + permissions: + statuses: write # Required to set commit status to pending steps: - name: Generate authentication token id: generate_token @@ -83,6 +84,8 @@ jobs: build-test-runner: needs: prep-pr + permissions: + packages: write # Required to push test runner image to GHCR uses: ./.github/workflows/build-test-runner.yml with: commit_sha: ${{ github.event.review.commit_id }} @@ -94,6 +97,8 @@ jobs: - build-test-runner runs-on: ubuntu-24.04 timeout-minutes: 90 + permissions: + packages: read # Required to pull test runner image from GHCR strategy: fail-fast: false matrix: @@ -134,6 +139,8 @@ jobs: - build-test-runner runs-on: ubuntu-24.04-8core timeout-minutes: 30 + permissions: + packages: read # Required to pull test runner image from GHCR strategy: fail-fast: false matrix: @@ -173,6 +180,8 @@ jobs: - integration-tests - e2e-tests if: always() && (startsWith(github.event.review.body, '/ci-run-integration') || contains(github.event.review.body, '/ci-run-all') || contains(github.event.review.body, '/ci-run-e2e')) + permissions: + statuses: write # Required to set final commit status env: FAILED: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }} steps: diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index c2693a2d33a18..2f48f1f1d1edf 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -9,13 +9,14 @@ on: permissions: contents: read - pull-requests: write - id-token: write # Needed to federate tokens. - actions: write jobs: CLAAssistant: runs-on: ubuntu-latest + permissions: + pull-requests: write # Required to comment on PRs + id-token: write # Required to federate tokens with dd-octo-sts-action + actions: write # Required to create/update workflow runs steps: - name: CLA already verified on PR if: github.event_name == 'merge_group' diff --git a/.github/workflows/cleanup-ghcr-images.yml b/.github/workflows/cleanup-ghcr-images.yml index 709190f9af108..c11ecbf6a0c9d 100644 --- a/.github/workflows/cleanup-ghcr-images.yml +++ b/.github/workflows/cleanup-ghcr-images.yml @@ -14,14 +14,13 @@ on: workflow_dispatch: permissions: - contents: read + contents: read # Restrictive default jobs: cleanup: runs-on: ubuntu-latest permissions: - packages: write - contents: read + packages: write # Required to delete package versions from GHCR steps: - name: Delete untagged vector images uses: actions/delete-package-versions@e5bc658cc4c965c472efe991f8beea3981499c55 # v5.0.0 diff --git a/.github/workflows/compilation-timings.yml b/.github/workflows/compilation-timings.yml deleted file mode 100644 index dec323e8185fc..0000000000000 --- a/.github/workflows/compilation-timings.yml +++ /dev/null @@ -1,74 +0,0 @@ -# Executes various builds of vector to time the results in order to track compilation times. -# -# This workflow is unrelated to the Regression workflow. - -name: Compilation Timings - -on: - workflow_dispatch: - -env: - CI: true - -jobs: - release-build-optimized: - name: "Release Build (optimized)" - runs-on: ubuntu-24.04-8core - steps: - - uses: colpal/actions-clean@36e6ca1abd35efe61cb60f912bd7837f67887c8a # v1.1.1 - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - run: bash scripts/environment/prepare.sh --modules=rustup - - run: cargo clean - - run: cargo build --release - - release-build-normal: - name: "Release Build (normal)" - runs-on: ubuntu-24.04-8core - env: - # We're not actually doing a debug build, we're just turning off the logic - # in release-flags.sh so that we don't override the Cargo "release" profile - # with full LTO / single codegen unit. - PROFILE: debug - steps: - - uses: colpal/actions-clean@36e6ca1abd35efe61cb60f912bd7837f67887c8a # v1.1.1 - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - run: bash scripts/environment/prepare.sh --modules=rustup - - run: cargo clean - - run: cargo build --release - - debug-build: - name: "Debug Build" - runs-on: ubuntu-24.04-8core - steps: - - uses: colpal/actions-clean@36e6ca1abd35efe61cb60f912bd7837f67887c8a # v1.1.1 - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - run: bash scripts/environment/prepare.sh --modules=rustup - - run: cargo clean - - run: cargo build - - debug-rebuild: - name: "Debug Rebuild" - runs-on: ubuntu-24.04-8core - steps: - - uses: colpal/actions-clean@36e6ca1abd35efe61cb60f912bd7837f67887c8a # v1.1.1 - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - run: bash scripts/environment/prepare.sh --modules=rustup - - run: cargo clean - - run: cargo build - - run: touch src/app.rs - - run: cargo build - - check: - name: "Cargo Check" - runs-on: ubuntu-24.04-8core - steps: - - uses: colpal/actions-clean@36e6ca1abd35efe61cb60f912bd7837f67887c8a # v1.1.1 - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - run: bash scripts/environment/prepare.sh --modules=rustup - - run: cargo clean - - run: cargo check diff --git a/.github/workflows/component_features.yml b/.github/workflows/component_features.yml index 153bbd99612f7..9770b4ab81383 100644 --- a/.github/workflows/component_features.yml +++ b/.github/workflows/component_features.yml @@ -39,6 +39,10 @@ jobs: with: ref: ${{ inputs.ref }} + - name: Free disk space + shell: bash + run: sudo -E bash scripts/ci-free-disk-space.sh + - uses: ./.github/actions/setup with: rust: true diff --git a/.github/workflows/create_preview_sites.yml b/.github/workflows/create_preview_sites.yml index 4440b984300c0..dbb93e6432a33 100644 --- a/.github/workflows/create_preview_sites.yml +++ b/.github/workflows/create_preview_sites.yml @@ -23,15 +23,18 @@ on: required: true permissions: - issues: write - pull-requests: write - statuses: write - actions: read + contents: read # Restrictive default jobs: create_preview_site: runs-on: ubuntu-24.04 timeout-minutes: 5 + permissions: + contents: read # Required for repository context + actions: read # Required to download artifacts + issues: write # Required to post preview link comments + pull-requests: write # Required to post preview link comments + statuses: write # Required to update commit status steps: # Get the artifacts with the PR number and branch name - name: Download artifact diff --git a/.github/workflows/cross.yml b/.github/workflows/cross.yml index 64da74d3f4877..8cf8706b1a42c 100644 --- a/.github/workflows/cross.yml +++ b/.github/workflows/cross.yml @@ -55,7 +55,7 @@ jobs: # aarch64 and musl in particular are notoriously hard to link. # While it may be tempting to slot a `check` in here for quickness, please don't. - run: make cross-build-${{ matrix.target }} - - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: "vector-debug-${{ matrix.target }}" path: "./target/${{ matrix.target }}/debug/vector" diff --git a/.github/workflows/custom_builds.yml b/.github/workflows/custom_builds.yml index b20b80c8330d9..98a7b3647295f 100644 --- a/.github/workflows/custom_builds.yml +++ b/.github/workflows/custom_builds.yml @@ -1,14 +1,16 @@ name: Custom Builds permissions: - contents: write - packages: write + contents: read # Restrictive default on: workflow_dispatch: {} jobs: Custom: + permissions: + contents: write # Required to create/update releases and tags + packages: write # Required to push container images to GHCR uses: ./.github/workflows/publish.yml with: git_ref: ${{ github.ref }} diff --git a/.github/workflows/gardener_issue_comment.yml b/.github/workflows/gardener_issue_comment.yml deleted file mode 100644 index b9936a1edecc4..0000000000000 --- a/.github/workflows/gardener_issue_comment.yml +++ /dev/null @@ -1,131 +0,0 @@ -# Gardener Issue Comment -# -# This workflow moves GH issues from the Gardener board's "Blocked / Waiting" column -# to "Triage", when a comment is posted on an issue from a non-team member -# so that the Gardener can assess the issue in light of new information. - -name: Gardener Issue Comment - -on: - issue_comment: - types: [created] - -jobs: - move-to-backlog: - name: Move issues back to Gardener project board Triage - runs-on: ubuntu-24.04 - timeout-minutes: 5 - if: ${{ !github.event.issue.pull_request }} - steps: - - name: Generate authentication token - id: generate_token - uses: tibdex/github-app-token@3beb63f4bd073e61482598c45c71c1019b59b73a - with: - app_id: ${{ secrets.GH_APP_DATADOG_VECTOR_CI_APP_ID }} - private_key: ${{ secrets.GH_APP_DATADOG_VECTOR_CI_APP_PRIVATE_KEY }} - - - name: Get PR comment author - id: comment - uses: tspascoal/get-user-teams-membership@57e9f42acd78f4d0f496b3be4368fc5f62696662 # v3.0.0 - with: - username: ${{ github.actor }} - team: 'Vector' - GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }} - - - name: Move issue back to Triage if status is Blocked/Waiting - if: steps.comment.outputs.isTeamMember == 'false' - env: - GH_TOKEN: ${{ secrets.GH_PROJECT_PAT }} - run: | - issue_id=${{ github.event.issue.node_id }} - echo "issue_id: $issue_id" - - # IDs fetched from https://docs.github.com/en/graphql/overview/explorer - project_id="PVT_kwDOAQFeYs4AAsTr" # Gardener - status_field_id="PVTF_lADOAQFeYs4AAsTrzgAXRuU" # Status - triage_option_id="2a08fafa" - - # Query for project items for the given issue - project_items="$(gh api graphql -f query=' - query($item_id: ID!) { - node(id: $item_id) { - ... on Issue { - projectItems(first: 50) { - ... on ProjectV2ItemConnection { - nodes { - fieldValueByName(name: "Status") { - ... on ProjectV2ItemFieldSingleSelectValue { - name - } - } - ... on ProjectV2Item { - id - project { - ... on ProjectV2 { - id - } - } - } - } - } - } - } - ... on PullRequest { - projectItems(first: 50) { - ... on ProjectV2ItemConnection { - nodes { - fieldValueByName(name: "Status") { - ... on ProjectV2ItemFieldSingleSelectValue { - name - } - } - ... on ProjectV2Item { - id - project { - ... on ProjectV2 { - id - } - } - } - } - } - } - } - } - }' -f item_id="$issue_id" - )" - - # Extract the item in the Gardener project - project=$(echo $project_items | jq -c -r --arg project_id $project_id '.data.node.projectItems.nodes[] | select(.project.id == $project_id)') - current_status=$(echo $project | jq -c -r '.fieldValueByName.name') - item_id=$(echo $project | jq -c '.id') - - if [ -z "$current_status" ] ; then - echo "Issue not found in Gardener board" - exit 0 - else - echo "Found issue on Gardener board. Current issue status is: '${current_status}'" - fi - - if [ "$current_status" = "Blocked / Waiting" ] ; then - echo "Moving issue from 'Blocked / Waiting' to 'Triage'" - gh api graphql -f query=' - mutation($project_id: ID!, $item_id: ID!, $field_id: ID!, $option_id: String) { - updateProjectV2ItemFieldValue( - input: { - projectId: $project_id - itemId: $item_id - fieldId: $field_id - value: { - singleSelectOptionId: $option_id - } - } - ) { - projectV2Item { - id - } - } - }' -f project_id="$project_id" -f item_id="$item_id" -f field_id="$status_field_id" -f option_id="$triage_option_id" - else - echo "Issue is in '${current_status}', not moving." - fi diff --git a/.github/workflows/gardener_open_issue.yml b/.github/workflows/gardener_open_issue.yml deleted file mode 100644 index 87389577949ac..0000000000000 --- a/.github/workflows/gardener_open_issue.yml +++ /dev/null @@ -1,21 +0,0 @@ -# Add new issues Gardener project board for triage -name: Add issue to Gardener board - -on: - issues: - types: - - opened - -permissions: - contents: read - -jobs: - add-to-project: - name: Add issue to Gardener project board - runs-on: ubuntu-24.04 - timeout-minutes: 5 - steps: - - uses: actions/add-to-project@244f685bbc3b7adfa8466e08b698b5577571133e # v1.0.2 - with: - project-url: https://github.com/orgs/vectordotdev/projects/49 - github-token: ${{ secrets.GH_PROJECT_PAT }} diff --git a/.github/workflows/gardener_open_pr.yml b/.github/workflows/gardener_open_pr.yml deleted file mode 100644 index 873c9da7e26ba..0000000000000 --- a/.github/workflows/gardener_open_pr.yml +++ /dev/null @@ -1,43 +0,0 @@ -# Add new pull requests to Gardener project board for triage -name: Add PR to Gardener board - -on: - pull_request_target: - types: - - opened - - reopened - -jobs: - add-contributor-to-project: - name: Add contributor PR to Gardener project board - runs-on: ubuntu-24.04 - timeout-minutes: 5 - if: ${{ github.actor != 'dependabot[bot]' }} - steps: - - name: Generate authentication token - id: generate_token - uses: tibdex/github-app-token@3beb63f4bd073e61482598c45c71c1019b59b73a - with: - app_id: ${{ secrets.GH_APP_DATADOG_VECTOR_CI_APP_ID }} - private_key: ${{ secrets.GH_APP_DATADOG_VECTOR_CI_APP_PRIVATE_KEY }} - - uses: tspascoal/get-user-teams-membership@57e9f42acd78f4d0f496b3be4368fc5f62696662 # v3.0.0 - id: checkVectorMember - with: - username: ${{ github.actor }} - team: vector - GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }} - - uses: actions/add-to-project@244f685bbc3b7adfa8466e08b698b5577571133e # v1.0.2 - if: ${{ steps.checkVectorMember.outputs.isTeamMember == 'false' }} - with: - project-url: https://github.com/orgs/vectordotdev/projects/49 - github-token: ${{ secrets.GH_PROJECT_PAT }} - add-dependabot-to-project: - name: Add dependabot PR to Gardener project board - runs-on: ubuntu-24.04 - timeout-minutes: 5 - if: ${{ github.actor == 'dependabot[bot]' }} - steps: - - uses: actions/add-to-project@244f685bbc3b7adfa8466e08b698b5577571133e # v1.0.2 - with: - project-url: https://github.com/orgs/vectordotdev/projects/49 - github-token: ${{ secrets.GH_PROJECT_PAT }} diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 838c922e63fae..48b4deeb218d3 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -14,6 +14,10 @@ on: required: true type: string +# Workflow-level permissions - read access to repository contents +permissions: + contents: read # Required to checkout code + env: AXIOM_TOKEN: ${{ secrets.AXIOM_TOKEN }} TEST_APPSIGNAL_PUSH_API_KEY: ${{ secrets.TEST_APPSIGNAL_PUSH_API_KEY }} diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 0ce2ffbbb5073..20fae17be16ae 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -18,7 +18,6 @@ concurrency: cancel-in-progress: true permissions: - packages: write contents: read env: @@ -45,6 +44,10 @@ jobs: secrets: inherit build-test-runner: + # Elevated permission required by build-test-runner.yml to push test runner image to GHCR + permissions: + contents: read + packages: write needs: changes if: ${{ always() && @@ -56,7 +59,6 @@ jobs: needs.changes.outputs.int-tests-any == 'true' || needs.changes.outputs.e2e-tests-any == 'true'))) }} - uses: ./.github/workflows/build-test-runner.yml with: commit_sha: ${{ github.sha }} @@ -86,8 +88,14 @@ jobs: with: submodules: "recursive" + - uses: ./.github/actions/setup + with: + vdev: true + mold: false + cargo-cache: false + - name: Download JSON artifact from changes.yml - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 if: github.event_name == 'merge_group' with: name: int_tests_changes @@ -148,8 +156,14 @@ jobs: with: submodules: "recursive" + - uses: ./.github/actions/setup + with: + vdev: true + mold: false + cargo-cache: false + - name: Download JSON artifact from changes.yml - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 if: github.event_name == 'merge_group' with: name: e2e_tests_changes @@ -197,6 +211,7 @@ jobs: runs-on: ubuntu-24.04 if: always() needs: + - changes - build-test-runner - integration-tests - e2e-tests diff --git a/.github/workflows/integration_windows.yml b/.github/workflows/integration_windows.yml new file mode 100644 index 0000000000000..ca60a57c1f122 --- /dev/null +++ b/.github/workflows/integration_windows.yml @@ -0,0 +1,56 @@ +name: Integration - Windows + +on: + workflow_dispatch: + pull_request: + +permissions: + contents: read + +jobs: + changes: + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + windows: ${{ steps.filter.outputs.windows }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + windows: + - "src/sources/windows_event_log/**" + - "src/internal_events/windows_event_log.rs" + - "tests/integration/windows-event-log/**" + - ".github/workflows/integration_windows.yml" + + run-test-integration-windows: + needs: changes + if: needs.changes.outputs.windows == 'true' + runs-on: windows-2025-8core + timeout-minutes: 60 + steps: + - name: Checkout branch + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.10" + + - run: .\scripts\environment\bootstrap-windows-2025.ps1 + + - name: Run Windows Event Log integration tests + run: make test-integration-windows-event-log + + test-integration-windows: + if: always() + runs-on: ubuntu-latest + needs: run-test-integration-windows + steps: + - run: | + if [[ "${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}" == "true" ]]; then + echo "One or more jobs failed or were cancelled" + exit 1 + fi diff --git a/.github/workflows/k8s_e2e.yml b/.github/workflows/k8s_e2e.yml index f75d470c46c22..0aa171f0daacd 100644 --- a/.github/workflows/k8s_e2e.yml +++ b/.github/workflows/k8s_e2e.yml @@ -88,10 +88,14 @@ jobs: rust: true cross: true mold: false + cargo-deb: true + + - name: Install packaging dependencies + run: sudo apt-get install -y cmark-gfm - run: VECTOR_VERSION="$(vdev version)" make package-deb-x86_64-unknown-linux-gnu - - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: e2e-test-deb-package path: target/artifacts/* @@ -175,7 +179,13 @@ jobs: with: ref: ${{ inputs.ref }} - - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + - uses: ./.github/actions/setup + with: + vdev: true + mold: false + cargo-cache: false + + - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: e2e-test-deb-package path: target/artifacts diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index ec200153b318c..848bed90f8299 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -1,8 +1,7 @@ name: Nightly permissions: - contents: write - packages: write + contents: read # Restrictive default on: workflow_dispatch: @@ -11,6 +10,9 @@ on: jobs: Nightly: + permissions: + contents: write # Required to create/update releases and tags + packages: write # Required to push container images to GHCR uses: ./.github/workflows/publish.yml with: git_ref: ${{ github.ref }} diff --git a/.github/workflows/preview_site_trigger.yml b/.github/workflows/preview_site_trigger.yml index b1e1838b5c49f..08ff26836ca20 100644 --- a/.github/workflows/preview_site_trigger.yml +++ b/.github/workflows/preview_site_trigger.yml @@ -2,10 +2,18 @@ name: Call Build Preview on: pull_request: types: [opened, reopened, synchronize] + +# Restrictive default permissions +permissions: + contents: read # Required for repository context + jobs: approval_check: runs-on: ubuntu-24.04 timeout-minutes: 5 + permissions: + contents: read # Required for repository context + actions: write # Required to upload artifacts that trigger the preview build workflow # Only run for PRs with 'website' in the branch name if: ${{ contains(github.head_ref, 'website') }} steps: @@ -46,7 +54,7 @@ jobs: # Upload the artifact using latest version (only if branch is valid) - name: Upload PR information artifact if: steps.validate.outputs.valid == 'true' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: pr path: pr/ diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3e33c58119644..3c533c593aeb6 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,8 +1,7 @@ name: Publish permissions: - contents: write - packages: write + contents: read on: workflow_call: @@ -30,7 +29,7 @@ jobs: generate-publish-metadata: name: Generate Publish-related Metadata runs-on: ubuntu-24.04 - timeout-minutes: 5 + timeout-minutes: 15 outputs: vector_version: ${{ steps.generate-publish-metadata.outputs.vector_version }} vector_build_desc: ${{ steps.generate-publish-metadata.outputs.vector_build_desc }} @@ -40,204 +39,16 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ inputs.git_ref }} + + - uses: ./.github/actions/setup + with: + vdev: true - name: Generate publish metadata id: generate-publish-metadata run: make ci-generate-publish-metadata - build-x86_64-unknown-linux-musl-packages: - name: Build Vector for x86_64-unknown-linux-musl (.tar.gz) - runs-on: release-builder-linux - timeout-minutes: 60 - needs: generate-publish-metadata - env: - VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} - VECTOR_BUILD_DESC: ${{ needs.generate-publish-metadata.outputs.vector_build_desc }} - CHANNEL: ${{ needs.generate-publish-metadata.outputs.vector_release_channel }} - steps: - - name: Checkout Vector - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: ${{ inputs.git_ref }} - - name: Bootstrap runner environment (Ubuntu-specific) - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - name: Bootstrap runner environment (generic) - run: bash scripts/environment/prepare.sh --modules=rustup,cross - - name: Build Vector - run: make package-x86_64-unknown-linux-musl-all - - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-musl - path: target/artifacts/vector* - - build-x86_64-unknown-linux-gnu-packages: - name: Build Vector for x86_64-unknown-linux-gnu (.tar.gz, DEB, RPM) - runs-on: release-builder-linux - timeout-minutes: 60 - needs: generate-publish-metadata - env: - VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} - VECTOR_BUILD_DESC: ${{ needs.generate-publish-metadata.outputs.vector_build_desc }} - CHANNEL: ${{ needs.generate-publish-metadata.outputs.vector_release_channel }} - steps: - - name: Checkout Vector - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: ${{ inputs.git_ref }} - - name: Bootstrap runner environment (Ubuntu-specific) - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - name: Bootstrap runner environment (generic) - run: bash scripts/environment/prepare.sh --modules=rustup,cross - - name: Build Vector - run: make package-x86_64-unknown-linux-gnu-all - - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-gnu - path: target/artifacts/vector* - - build-aarch64-unknown-linux-musl-packages: - name: Build Vector for aarch64-unknown-linux-musl (.tar.gz) - runs-on: release-builder-linux - timeout-minutes: 60 - needs: generate-publish-metadata - env: - VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} - VECTOR_BUILD_DESC: ${{ needs.generate-publish-metadata.outputs.vector_build_desc }} - CHANNEL: ${{ needs.generate-publish-metadata.outputs.vector_release_channel }} - steps: - - name: Checkout Vector - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: ${{ inputs.git_ref }} - - name: Bootstrap runner environment (Ubuntu-specific) - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - name: Bootstrap runner environment (generic) - run: bash scripts/environment/prepare.sh --modules=rustup,cross - - name: Build Vector - env: - DOCKER_PRIVILEGED: "true" - run: make package-aarch64-unknown-linux-musl-all - - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-musl - path: target/artifacts/vector* - - build-aarch64-unknown-linux-gnu-packages: - name: Build Vector for aarch64-unknown-linux-gnu (.tar.gz) - runs-on: release-builder-linux - timeout-minutes: 60 - needs: generate-publish-metadata - env: - VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} - VECTOR_BUILD_DESC: ${{ needs.generate-publish-metadata.outputs.vector_build_desc }} - CHANNEL: ${{ needs.generate-publish-metadata.outputs.vector_release_channel }} - steps: - - name: Checkout Vector - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: ${{ inputs.git_ref }} - - name: Bootstrap runner environment (Ubuntu-specific) - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - name: Bootstrap runner environment (generic) - run: bash scripts/environment/prepare.sh --modules=rustup,cross - - name: Build Vector - env: - DOCKER_PRIVILEGED: "true" - run: make package-aarch64-unknown-linux-gnu-all - - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-gnu - path: target/artifacts/vector* - - build-armv7-unknown-linux-gnueabihf-packages: - name: Build Vector for armv7-unknown-linux-gnueabihf (.tar.gz) - runs-on: release-builder-linux - timeout-minutes: 60 - needs: generate-publish-metadata - env: - VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} - VECTOR_BUILD_DESC: ${{ needs.generate-publish-metadata.outputs.vector_build_desc }} - CHANNEL: ${{ needs.generate-publish-metadata.outputs.vector_release_channel }} - steps: - - name: Checkout Vector - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: ${{ inputs.git_ref }} - - name: Bootstrap runner environment (Ubuntu-specific) - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - name: Bootstrap runner environment (generic) - run: bash scripts/environment/prepare.sh --modules=rustup,cross - - name: Build Vector - env: - DOCKER_PRIVILEGED: "true" - run: make package-armv7-unknown-linux-gnueabihf-all - - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-gnueabihf - path: target/artifacts/vector* - - build-armv7-unknown-linux-musleabihf-packages: - name: Build Vector for armv7-unknown-linux-musleabihf (.tar.gz) - runs-on: release-builder-linux - timeout-minutes: 60 - needs: generate-publish-metadata - env: - VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} - VECTOR_BUILD_DESC: ${{ needs.generate-publish-metadata.outputs.vector_build_desc }} - CHANNEL: ${{ needs.generate-publish-metadata.outputs.vector_release_channel }} - steps: - - name: Checkout Vector - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: ${{ inputs.git_ref }} - - name: Bootstrap runner environment (Ubuntu-specific) - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - name: Bootstrap runner environment (generic) - run: bash scripts/environment/prepare.sh --modules=rustup,cross - - name: Build Vector - env: - DOCKER_PRIVILEGED: "true" - run: make package-armv7-unknown-linux-musleabihf - - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-musleabihf - path: target/artifacts/vector* - - build-arm-unknown-linux-gnueabi-packages: - name: Build Vector for arm-unknown-linux-gnueabi (.tar.gz) - runs-on: release-builder-linux - timeout-minutes: 60 - needs: generate-publish-metadata - env: - VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} - VECTOR_BUILD_DESC: ${{ needs.generate-publish-metadata.outputs.vector_build_desc }} - CHANNEL: ${{ needs.generate-publish-metadata.outputs.vector_release_channel }} - steps: - - name: Checkout Vector - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: ${{ inputs.git_ref }} - - name: Bootstrap runner environment (Ubuntu-specific) - run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - - name: Bootstrap runner environment (generic) - run: bash scripts/environment/prepare.sh --modules=rustup,cross - - name: Build Vector - env: - DOCKER_PRIVILEGED: "true" - run: make package-arm-unknown-linux-gnueabi-all - - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-gnueabi - path: target/artifacts/vector* - - build-arm-unknown-linux-musleabi-packages: - name: Build Vector for arm-unknown-linux-musleabi (.tar.gz) + build-linux-packages: + name: Build Vector for ${{ matrix.target }} runs-on: release-builder-linux timeout-minutes: 60 needs: generate-publish-metadata @@ -245,6 +56,17 @@ jobs: VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} VECTOR_BUILD_DESC: ${{ needs.generate-publish-metadata.outputs.vector_build_desc }} CHANNEL: ${{ needs.generate-publish-metadata.outputs.vector_release_channel }} + strategy: + matrix: + include: + - target: x86_64-unknown-linux-musl # .tar.gz + - target: x86_64-unknown-linux-gnu # .tar.gz, .deb, .rpm + - target: aarch64-unknown-linux-musl # .tar.gz + - target: aarch64-unknown-linux-gnu # .tar.gz, .deb, .rpm + - target: armv7-unknown-linux-gnueabihf # .tar.gz, .deb, .rpm + - target: armv7-unknown-linux-musleabihf # .tar.gz + - target: arm-unknown-linux-gnueabi # .tar.gz, .deb + - target: arm-unknown-linux-musleabi # .tar.gz steps: - name: Checkout Vector uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -253,15 +75,15 @@ jobs: - name: Bootstrap runner environment (Ubuntu-specific) run: sudo -E bash scripts/environment/bootstrap-ubuntu-24.04.sh - name: Bootstrap runner environment (generic) - run: bash scripts/environment/prepare.sh --modules=rustup,cross + run: bash scripts/environment/prepare.sh --modules=rustup,cross,cargo-deb + - name: Install cross-compilation tools + run: sudo apt-get install -y binutils-arm-linux-gnueabihf binutils-aarch64-linux-gnu - name: Build Vector - env: - DOCKER_PRIVILEGED: "true" - run: make package-arm-unknown-linux-musleabi + run: make package-${{ matrix.target }}-all - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-musleabi + name: vector-${{ env.VECTOR_VERSION }}-${{ matrix.target }} path: target/artifacts/vector* build-apple-darwin-packages: @@ -293,10 +115,12 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ inputs.git_ref }} - - name: Bootstrap runner environment (macOS-specific) - run: | - bash scripts/environment/bootstrap-macos.sh - bash scripts/environment/prepare.sh --modules=rustup + + - uses: ./.github/actions/setup + with: + rust: true + rustup: true + protoc: true - name: Build Vector env: TARGET: "${{ matrix.architecture }}-apple-darwin" @@ -305,12 +129,11 @@ jobs: export PATH="$HOME/.cargo/bin:$PATH" make package - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: vector-${{ env.VECTOR_VERSION }}-${{ matrix.architecture }}-apple-darwin path: target/artifacts/vector* - build-x86_64-pc-windows-msvc-packages: name: Build Vector for x86_64-pc-windows-msvc (.zip) runs-on: windows-2025-8core @@ -353,7 +176,7 @@ jobs: export PATH="/c/wix:$PATH" ./scripts/package-msi.sh - name: Stage package artifacts for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: vector-${{ env.VECTOR_VERSION }}-x86_64-pc-windows-msvc path: target/artifacts/vector* @@ -364,7 +187,7 @@ jobs: timeout-minutes: 5 needs: - generate-publish-metadata - - build-x86_64-unknown-linux-gnu-packages + - build-linux-packages env: VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} DD_PKG_VERSION: "latest" @@ -398,7 +221,7 @@ jobs: with: ref: ${{ inputs.git_ref }} - name: Download staged package artifacts (x86_64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-gnu path: target/artifacts @@ -409,10 +232,10 @@ jobs: rpm-verify: name: Verify RPM Packages runs-on: ubuntu-24.04 - timeout-minutes: 5 + timeout-minutes: 10 needs: - generate-publish-metadata - - build-x86_64-unknown-linux-gnu-packages + - build-linux-packages env: VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} DD_PKG_VERSION: "latest" @@ -449,7 +272,7 @@ jobs: with: ref: ${{ inputs.git_ref }} - name: Download staged package artifacts (x86_64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-gnu path: target/artifacts @@ -478,7 +301,7 @@ jobs: with: ref: ${{ inputs.git_ref }} - name: Download staged package artifacts (${{ matrix.target }}) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: vector-${{ env.VECTOR_VERSION }}-${{ matrix.target }} path: target/artifacts @@ -491,16 +314,12 @@ jobs: name: Publish to Docker runs-on: ubuntu-24.04 timeout-minutes: 15 + # Elevated permission required to push Docker images to GitHub Container Registry + permissions: + packages: write needs: - generate-publish-metadata - - build-aarch64-unknown-linux-gnu-packages - - build-aarch64-unknown-linux-musl-packages - - build-x86_64-unknown-linux-gnu-packages - - build-x86_64-unknown-linux-musl-packages - - build-armv7-unknown-linux-musleabihf-packages - - build-armv7-unknown-linux-gnueabihf-packages - - build-arm-unknown-linux-gnueabi-packages - - build-arm-unknown-linux-musleabi-packages + - build-linux-packages - deb-verify env: VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} @@ -531,47 +350,15 @@ jobs: uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 with: version: latest - install: true - - name: Download staged package artifacts (aarch64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-gnu - path: target/artifacts - - name: Download staged package artifacts (aarch64-unknown-linux-musl) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-musl - path: target/artifacts - - name: Download staged package artifacts (x86_64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-gnu - path: target/artifacts - - name: Download staged package artifacts (x86_64-unknown-linux-musl) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-musl - path: target/artifacts - - name: Download staged package artifacts (armv7-unknown-linux-gnueabihf) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-gnueabihf - path: target/artifacts - - name: Download staged package artifacts (armv7-unknown-linux-musleabihf) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + - name: Download all Linux package artifacts + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-musleabihf + pattern: vector-${{ env.VECTOR_VERSION }}-*-linux-* path: target/artifacts - - name: Download staged package artifacts (arm-unknown-linux-gnueabi) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + merge-multiple: true + - uses: ./.github/actions/setup with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-gnueabi - path: target/artifacts - - name: Download staged package artifacts (arm-unknown-linux-musleabi) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-musleabi - path: target/artifacts + vdev: true - name: Build and publish Docker images env: PLATFORM: "linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v6" @@ -589,16 +376,9 @@ jobs: timeout-minutes: 10 needs: - generate-publish-metadata - - build-x86_64-unknown-linux-gnu-packages - - build-x86_64-unknown-linux-musl-packages - - build-aarch64-unknown-linux-musl-packages - - build-aarch64-unknown-linux-gnu-packages + - build-linux-packages - build-apple-darwin-packages - build-x86_64-pc-windows-msvc-packages - - build-armv7-unknown-linux-musleabihf-packages - - build-armv7-unknown-linux-gnueabihf-packages - - build-arm-unknown-linux-gnueabi-packages - - build-arm-unknown-linux-musleabi-packages - deb-verify - rpm-verify - macos-verify @@ -610,56 +390,15 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ inputs.git_ref }} - - name: Download staged package artifacts (aarch64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-gnu - path: target/artifacts - - name: Download staged package artifacts (aarch64-unknown-linux-musl) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-musl - path: target/artifacts - - name: Download staged package artifacts (x86_64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-gnu - path: target/artifacts - - name: Download staged package artifacts (x86_64-unknown-linux-musl) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-musl - path: target/artifacts - - name: Download staged package artifacts (arm64-apple-darwin) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-arm64-apple-darwin - path: target/artifacts - - name: Download staged package artifacts (x86_64-pc-windows-msvc) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-pc-windows-msvc - path: target/artifacts - - name: Download staged package artifacts (armv7-unknown-linux-gnueabihf) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-gnueabihf - path: target/artifacts - - name: Download staged package artifacts (armv7-unknown-linux-musleabihf) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-musleabihf - path: target/artifacts - - name: Download staged package artifacts (arm-unknown-linux-gnueabi) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + - uses: ./.github/actions/setup with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-gnueabi - path: target/artifacts - - name: Download staged package artifacts (arm-unknown-linux-musleabi) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + vdev: true + - name: Download all package artifacts + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-musleabi + pattern: vector-${{ env.VECTOR_VERSION }}-* path: target/artifacts + merge-multiple: true - name: Publish artifacts to S3 env: AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} @@ -672,18 +411,14 @@ jobs: if: inputs.channel == 'release' runs-on: ubuntu-24.04 timeout-minutes: 10 + # Elevated permission required to create GitHub releases and upload release assets + permissions: + contents: write needs: - generate-publish-metadata - - build-x86_64-unknown-linux-gnu-packages - - build-x86_64-unknown-linux-musl-packages - - build-aarch64-unknown-linux-musl-packages - - build-aarch64-unknown-linux-gnu-packages + - build-linux-packages - build-apple-darwin-packages - build-x86_64-pc-windows-msvc-packages - - build-armv7-unknown-linux-gnueabihf-packages - - build-armv7-unknown-linux-musleabihf-packages - - build-arm-unknown-linux-gnueabi-packages - - build-arm-unknown-linux-musleabi-packages - deb-verify - rpm-verify - macos-verify @@ -695,61 +430,15 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ inputs.git_ref }} - - name: Download staged package artifacts (aarch64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-gnu - path: target/artifacts - - name: Download staged package artifacts (aarch64-unknown-linux-musl) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-musl - path: target/artifacts - - name: Download staged package artifacts (x86_64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-gnu - path: target/artifacts - - name: Download staged package artifacts (x86_64-unknown-linux-musl) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-musl - path: target/artifacts - - name: Download staged package artifacts (arm64-apple-darwin) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-arm64-apple-darwin - path: target/artifacts - - name: Download staged package artifacts (x86_64-pc-windows-msvc) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-pc-windows-msvc - path: target/artifacts - - name: Download staged package artifacts (armv7-unknown-linux-gnueabihf) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-gnueabihf - path: target/artifacts - - name: Download staged package artifacts (armv7-unknown-linux-musleabihf) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-musleabihf - path: target/artifacts - - name: Download artifact checksums - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + - uses: ./.github/actions/setup with: - name: vector-${{ env.VECTOR_VERSION }}-SHA256SUMS - path: target/artifacts - - name: Download staged package artifacts (arm-unknown-linux-gnueabi) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + vdev: true + - name: Download all package artifacts + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-gnueabi - path: target/artifacts - - name: Download staged package artifacts (arm-unknown-linux-musleabi) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-musleabi + pattern: vector-${{ env.VECTOR_VERSION }}-* path: target/artifacts + merge-multiple: true - name: Publish release to GitHub env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -761,16 +450,9 @@ jobs: timeout-minutes: 5 needs: - generate-publish-metadata - - build-x86_64-unknown-linux-gnu-packages - - build-x86_64-unknown-linux-musl-packages - - build-aarch64-unknown-linux-musl-packages - - build-aarch64-unknown-linux-gnu-packages + - build-linux-packages - build-apple-darwin-packages - build-x86_64-pc-windows-msvc-packages - - build-armv7-unknown-linux-gnueabihf-packages - - build-armv7-unknown-linux-musleabihf-packages - - build-arm-unknown-linux-gnueabi-packages - - build-arm-unknown-linux-musleabi-packages env: VECTOR_VERSION: ${{ needs.generate-publish-metadata.outputs.vector_version }} steps: @@ -778,60 +460,16 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ inputs.git_ref }} - - name: Download staged package artifacts (aarch64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-gnu - path: target/artifacts - - name: Download staged package artifacts (aarch64-unknown-linux-musl) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-aarch64-unknown-linux-musl - path: target/artifacts - - name: Download staged package artifacts (x86_64-unknown-linux-gnu) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-gnu - path: target/artifacts - - name: Download staged package artifacts (x86_64-unknown-linux-musl) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-unknown-linux-musl - path: target/artifacts - - name: Download staged package artifacts (arm64-apple-darwin) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-arm64-apple-darwin - path: target/artifacts - - name: Download staged package artifacts (x86_64-pc-windows-msvc) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-x86_64-pc-windows-msvc - path: target/artifacts - - name: Download staged package artifacts (armv7-unknown-linux-gnueabihf) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-gnueabihf - path: target/artifacts - - name: Download staged package artifacts (armv7-unknown-linux-musleabihf) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-armv7-unknown-linux-musleabihf - path: target/artifacts - - name: Download staged package artifacts (arm-unknown-linux-gnueabi) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 - with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-gnueabi - path: target/artifacts - - name: Download staged package artifacts (arm-unknown-linux-musleabi) - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + - name: Download all package artifacts + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: - name: vector-${{ env.VECTOR_VERSION }}-arm-unknown-linux-musleabi + pattern: vector-${{ env.VECTOR_VERSION }}-* path: target/artifacts + merge-multiple: true - name: Generate SHA256 checksums for artifacts run: make sha256sum - name: Stage checksum for publish - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: vector-${{ env.VECTOR_VERSION }}-SHA256SUMS path: target/artifacts/vector-${{ env.VECTOR_VERSION }}-SHA256SUMS diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index 0c462b27de27a..2621118a9df11 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -34,6 +34,10 @@ on: schedule: - cron: '0 7 * * 1' # Runs at 7 AM UTC on Mondays +# Workflow-level permissions - read access to repository contents +permissions: + contents: read # Required to checkout code + env: SINGLE_MACHINE_PERFORMANCE_API: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_API }} SMP_WARMUP_SECONDS: 70 # default is 45 seconds @@ -105,7 +109,7 @@ jobs: - name: Set SMP version id: experimental-meta run: | - export SMP_CRATE_VERSION="0.25.1" + export SMP_CRATE_VERSION="0.26.1" echo "smp crate version: ${SMP_CRATE_VERSION}" echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT @@ -189,6 +193,10 @@ jobs: needs: - should-run-gate - resolve-inputs + # Job-level permissions for artifact upload + permissions: + contents: read # Required to checkout code + actions: write # Required to upload artifacts steps: - uses: colpal/actions-clean@36e6ca1abd35efe61cb60f912bd7837f67887c8a # v1.1.1 @@ -216,7 +224,7 @@ jobs: vector:${{ needs.resolve-inputs.outputs.baseline-tag }} - name: Upload image as artifact - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: baseline-image path: "${{ runner.temp }}/baseline-image.tar" @@ -228,6 +236,10 @@ jobs: needs: - should-run-gate - resolve-inputs + # Job-level permissions for artifact upload + permissions: + contents: read # Required to checkout code + actions: write # Required to upload artifacts steps: - uses: colpal/actions-clean@36e6ca1abd35efe61cb60f912bd7837f67887c8a # v1.1.1 @@ -255,7 +267,7 @@ jobs: vector:${{ needs.resolve-inputs.outputs.comparison-tag }} - name: Upload image as artifact - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: comparison-image path: "${{ runner.temp }}/comparison-image.tar" @@ -294,7 +306,7 @@ jobs: - build-baseline steps: - name: 'Download baseline image' - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: baseline-image @@ -334,7 +346,7 @@ jobs: - build-comparison steps: - name: 'Download comparison image' - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: comparison-image @@ -372,6 +384,10 @@ jobs: - resolve-inputs - upload-baseline-image-to-ecr - upload-comparison-image-to-ecr + # Job-level permissions for artifact upload + permissions: + contents: read # Required to checkout code + actions: write # Required to upload artifacts steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -408,7 +424,7 @@ jobs: --submission-metadata ${{ runner.temp }}/submission-metadata \ --replicas ${{ env.SMP_REPLICAS }} - - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: vector-submission-metadata path: ${{ runner.temp }}/submission-metadata @@ -462,7 +478,7 @@ jobs: aws s3 cp s3://smp-cli-releases/v${{ needs.resolve-inputs.outputs.smp-version }}/x86_64-unknown-linux-musl/smp ${{ runner.temp }}/bin/smp - name: Download submission metadata - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: vector-submission-metadata path: ${{ runner.temp }}/ @@ -484,6 +500,10 @@ jobs: - should-run-gate - submit-job - resolve-inputs + # Job-level permissions for artifact upload + permissions: + contents: read # Required to checkout code + actions: write # Required to upload artifacts steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -501,7 +521,7 @@ jobs: aws s3 cp s3://smp-cli-releases/v${{ needs.resolve-inputs.outputs.smp-version }}/x86_64-unknown-linux-musl/smp ${{ runner.temp }}/bin/smp - name: Download submission metadata - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: vector-submission-metadata path: ${{ runner.temp }}/ @@ -523,7 +543,7 @@ jobs: path: ${{ runner.temp }}/outputs/report.md - name: Upload regression report to artifacts - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: capture-artifacts path: ${{ runner.temp }}/outputs/* @@ -547,7 +567,7 @@ jobs: steps: - name: Download capture-artifacts continue-on-error: true - uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: capture-artifacts diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d49db554965e4..e9965f4d6193c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,8 +1,7 @@ name: Release Suite permissions: - contents: write - packages: write + contents: read # Restrictive default on: push: @@ -12,6 +11,9 @@ on: jobs: Release: + permissions: + contents: write # Required to create/update releases and tags + packages: write # Required to push container images to GHCR uses: ./.github/workflows/publish.yml with: git_ref: ${{ github.ref }} diff --git a/.github/workflows/gardener_remove_waiting_author.yml b/.github/workflows/remove_waiting_author.yml similarity index 100% rename from .github/workflows/gardener_remove_waiting_author.yml rename to .github/workflows/remove_waiting_author.yml diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 37287fc734701..c43853e5a7d51 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -59,7 +59,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 114321ae28242..5572b2d3f8d9d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,6 +5,10 @@ on: merge_group: types: [checks_requested] +# Workflow-level permissions - read access to repository contents +permissions: + contents: read # Required to checkout code + concurrency: # `github.ref` is unique for MQ runs and PRs group: ${{ github.workflow }}-${{ github.ref }} @@ -146,8 +150,8 @@ jobs: markdownlint: true - run: make check-markdown - check-component-docs: - name: Check Component Docs + check-generated-docs: + name: Check Generated Docs runs-on: ubuntu-24.04-8core if: ${{ needs.changes.outputs.source == 'true' || needs.changes.outputs.component_docs == 'true' || needs.changes.outputs.test-yml == 'true' }} needs: changes @@ -159,7 +163,7 @@ jobs: protoc: true cue: true libsasl2: true - - run: make check-component-docs + - run: make check-generated-docs check-rust-docs: name: Check Rust Docs @@ -217,7 +221,7 @@ jobs: - check-licenses - check-docs - check-markdown - - check-component-docs + - check-generated-docs - check-rust-docs - test-vrl - build-vrl-playground diff --git a/.github/workflows/vdev_publish.yml b/.github/workflows/vdev_publish.yml index 155db01ecbbed..edb2f36674731 100644 --- a/.github/workflows/vdev_publish.yml +++ b/.github/workflows/vdev_publish.yml @@ -4,11 +4,13 @@ on: tags: [ "vdev-v*.*.*" ] permissions: - contents: write # needed for creating releases + contents: read # Restrictive default jobs: build: runs-on: ${{ matrix.os }} + permissions: + contents: write # Required to upload release assets strategy: matrix: include: diff --git a/AGENTS.md b/AGENTS.md index de7a4c77b1100..24fd23e790e76 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -45,6 +45,21 @@ reduction and improved data quality for observability infrastructure. - `/docs/` - Developer documentation - `/tests/` - Integration and E2E tests +## Development Workflow + +### Iterative Development Process + +When working on Vector's Rust codebase, follow this iterative development cycle: + +1. Make code changes +2. Run `make check-clippy` to check for linting issues +3. Fix any issues found (use `make clippy-fix` for auto-fixes) +4. Continue to next task or mark current task complete + +Run this cycle after any code modification. + +When editing markdown files (*.md), run `make check-markdown` after changes. + ## Two Different Workflows ### Rust Development (Most Common) @@ -99,7 +114,8 @@ See [Integration Tests](#integration-tests) section below for more details. make fmt # Format code make check-fmt # Verify formatting make check-clippy # Run Clippy linter -make check-component-docs # Check component documentation +make check-markdown # Check markdown files +make check-generated-docs # Check generated documentation ./scripts/check_changelog_fragments.sh # Verify changelog ``` @@ -131,102 +147,6 @@ make cue-build **Note:** Website changes use Hugo, CUE, Tailwind CSS, and TypeScript. See [website/README.md](website/README.md) for details. -## Rust Coding Conventions - -### Import Statements (`use`) - -All `use` statements must be at the **top of the file/module** or at the top of `mod tests`. -This is for consistency. - -**Correct:** - -```rust -use std::time::Duration; -use governor::clock; -use crate::config::TransformConfig; - -fn my_function() { - // function code -} -``` - -**Incorrect:** - -```rust -fn my_function() { - use std::time::Duration; // WRONG; Do not insert `use` inside functions - // function code -} -``` - -**Organization:** - -- Group imports: `std` → external crates → internal (`crate::`) -- Use `rustfmt` to automatically organize them: `make fmt` - -### Logging Style - -Always use the [Tracing crate](https://tracing.rs/tracing/)'s key/value style: - -**Correct:** - -```rust -warn!(message = "Failed to merge value.", %error); -info!(message = "Processing batch.", batch_size, internal_log_rate_secs = 1); -``` - -**Incorrect:** - -```rust -warn!("Failed to merge value: {}.", err); // Don't do this -``` - -**Rules:** - -- Events should be capitalized and end with a period -- Use `error` (not `e` or `err`) for error values -- Prefer Display over Debug: `%error` not `?error` -- Key/value pairs provide structured logging - -### String Formatting - -Prefer inline variable syntax in format strings (Rust 1.58+). - -**Correct:** - -```rust -format!("Error: {err}"); -println!("Processing {count} items"); -``` - -**Incorrect:** - -```rust -format!("Error: {}", err); // Unnecessary positional argument -println!("Processing {} items", count); -``` - -**Why:** Inline syntax is more readable and reduces mistakes with argument ordering. - -### Panics - -Code in Vector should **NOT** panic under normal circumstances. - -- Panics are only acceptable when assumptions about internal state are violated (indicating a bug) -- All potential panics **MUST** be documented in function documentation -- Prefer `Result` and proper error handling - -### Feature Flags - -New components (sources, sinks, transforms) must be behind feature flags: - -```bash -# Build only specific component for faster iteration -cargo test --lib --no-default-features --features sinks-console sinks::console -``` - -See `features` section in `Cargo.toml` for examples. - ## Common Patterns ### Development Tools @@ -258,21 +178,19 @@ echo "Running pre-push checks..." make check-licenses make check-fmt make check-clippy -make check-component-docs +make check-markdown +make check-generated-docs ./scripts/check_changelog_fragments.sh ``` Then: `chmod +x .git/hooks/pre-push` -### Container Development +## Detailed Documentation -Vector supports development in Docker/Podman containers: - -```bash -ENVIRONMENT=true make -# Example: ENVIRONMENT=true make test -``` +| Topic | Document | +|-------|----------| +| Rust style patterns | [docs/RUST_STYLE.md](docs/RUST_STYLE.md) | ## Architecture Notes @@ -282,7 +200,7 @@ ENVIRONMENT=true make - **Transforms**: Modify, filter, or enrich event data - **Sinks**: Send data to external systems -Component docs are auto-generated from code annotations. Run `make check-component-docs` after changes. +Component docs are auto-generated from code annotations. Run `make check-generated-docs` after changes. ### Integration Tests @@ -319,12 +237,12 @@ Run `make fmt` before committing. Formatting must be exact. Run `make clippy-fix` to auto-fix many issues. Manual fixes may be required. -### Component Docs Out of Sync +### Generated Docs Out of Sync -Component documentation is generated from code. Run: +Documentation is generated from code. Run: ```bash -make check-component-docs +make check-generated-docs ``` ### License Check Fails diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 93868085dc574..2cdf612eb1156 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -87,7 +87,7 @@ To merge a new source, sink, or transform, the pull request is required to: - [ ] Add tests, especially integration tests if your contribution connects to an external service. - [ ] Add instrumentation so folks using your integration can get insight into how it's working and performing. You can see some [example of instrumentation in existing integrations](https://github.com/vectordotdev/vector/tree/master/src/internal_events). -- [ ] Add documentation. You can see [examples in the `docs` directory](https://github.com/vectordotdev/vector/blob/master/docs). +- [ ] Add documentation. You need to generate and create documentation files for your component. See the [component documentation guide](docs/DOCUMENTING.md#adding-documentation-for-new-components) for detailed instructions. When adding new integration tests, the following changes are needed in the GitHub Workflows: @@ -141,7 +141,7 @@ echo "Running pre-push checks..." make check-licenses make check-fmt make check-clippy -make check-component-docs +make check-generated-docs # Some other checks that in our experience rarely fail on PRs. make check-deny @@ -292,7 +292,7 @@ cargo vdev check events cargo vdev check licenses # Vector's documentation for each component is generated from the comments attached to the Component structs and members. # Running this ensures that the generated docs are up to date. -make check-component-docs +make check-generated-docs # Generate the code documentation for the Vector project. # Run this to ensure the docs can be generated without errors (warnings are acceptable at the minute). cd rust-doc && make docs diff --git a/Cargo.lock b/Cargo.lock index 961a82a267cba..9dab8614065c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -180,15 +180,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - [[package]] name = "anstream" version = "0.6.13" @@ -1871,7 +1862,7 @@ dependencies = [ "bitflags 2.10.0", "cexpr", "clang-sys", - "itertools 0.13.0", + "itertools 0.10.5", "proc-macro2 1.0.106", "quote 1.0.44", "regex", @@ -2225,9 +2216,9 @@ dependencies = [ [[package]] name = "bytesize" -version = "2.1.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5c434ae3cf0089ca203e9019ebe529c47ff45cefe8af7c85ecb734ef541822f" +checksum = "6bd91ee7b2422bcb158d90ef4d14f75ef67f340943fc4149891dcce8f8b972a3" [[package]] name = "cargo-lock" @@ -2526,7 +2517,7 @@ dependencies = [ "chrono", "csv-core", "derivative", - "derive_more 2.0.1", + "derive_more 2.1.1", "dyn-clone", "flate2", "futures 0.3.31", @@ -2798,6 +2789,15 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "convert_case" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cookie" version = "0.18.1" @@ -3016,11 +3016,11 @@ checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ "bitflags 2.10.0", "crossterm_winapi", - "derive_more 2.0.1", + "derive_more 2.1.1", "document-features", "futures-core", "mio", - "parking_lot 0.12.5", + "parking_lot", "rustix 1.0.1", "signal-hook", "signal-hook-mio", @@ -3239,7 +3239,7 @@ dependencies = [ "hashbrown 0.14.5", "lock_api", "once_cell", - "parking_lot_core 0.9.12", + "parking_lot_core", ] [[package]] @@ -3263,7 +3263,7 @@ dependencies = [ "cookie", "log", "once_cell", - "parking_lot 0.12.5", + "parking_lot", "percent-encoding", "reqwest 0.12.28", "semver", @@ -3417,22 +3417,23 @@ dependencies = [ [[package]] name = "derive_more" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" dependencies = [ "derive_more-impl", ] [[package]] name = "derive_more-impl" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" dependencies = [ - "convert_case 0.7.1", + "convert_case 0.10.0", "proc-macro2 1.0.106", "quote 1.0.44", + "rustc_version", "syn 2.0.117", "unicode-xid 0.2.4", ] @@ -3541,7 +3542,7 @@ dependencies = [ "chrono-tz", "dnsmsg-parser", "hickory-proto 0.25.2", - "paste", + "pastey", "prost 0.12.6", "prost-build 0.12.6", "snafu 0.8.9", @@ -4301,7 +4302,7 @@ checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" dependencies = [ "futures-core", "lock_api", - "parking_lot 0.12.5", + "parking_lot", ] [[package]] @@ -4501,7 +4502,7 @@ dependencies = [ "getrandom 0.3.4", "hashbrown 0.16.0", "nonzero_ext", - "parking_lot 0.12.5", + "parking_lot", "portable-atomic", "rand 0.9.2", "smallvec", @@ -4592,7 +4593,7 @@ dependencies = [ "futures 0.3.31", "futures-util", "greptime-proto", - "parking_lot 0.12.5", + "parking_lot", "prost 0.12.6", "rand 0.9.2", "snafu 0.8.9", @@ -4990,7 +4991,7 @@ dependencies = [ "ipconfig", "lru-cache", "once_cell", - "parking_lot 0.12.5", + "parking_lot", "rand 0.8.5", "resolv-conf", "smallvec", @@ -5155,7 +5156,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.4.10", "tokio", "tower-service", "tracing 0.1.44", @@ -5212,7 +5213,7 @@ dependencies = [ "once_cell", "openssl", "openssl-sys", - "parking_lot 0.12.5", + "parking_lot", "tokio", "tokio-openssl", "tower-layer", @@ -5231,7 +5232,7 @@ dependencies = [ "once_cell", "openssl", "openssl-sys", - "parking_lot 0.12.5", + "parking_lot", "pin-project", "tower-layer", "tower-service", @@ -5657,9 +5658,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", ] [[package]] @@ -5838,9 +5836,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.90" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dc6f6450b3f6d4ed5b16327f38fed626d375a886159ca555bd7822c0c3a5a6" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", @@ -6097,7 +6095,7 @@ dependencies = [ "jsonptr", "k8s-openapi 0.22.0", "kube-client", - "parking_lot 0.12.5", + "parking_lot", "pin-project", "serde", "serde_json", @@ -6152,7 +6150,7 @@ dependencies = [ "flume 0.11.0", "futures-core", "futures-io", - "parking_lot 0.12.5", + "parking_lot", "pinky-swear", "reactor-trait", "serde", @@ -6228,9 +6226,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.180" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libflate" @@ -6631,9 +6629,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.5" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" @@ -6763,7 +6761,7 @@ dependencies = [ "mlua-sys", "mlua_derive", "num-traits", - "parking_lot 0.12.5", + "parking_lot", "rustc-hash", "rustversion", ] @@ -6816,7 +6814,7 @@ dependencies = [ "event-listener 5.3.1", "futures-util", "once_cell", - "parking_lot 0.12.5", + "parking_lot", "quanta", "rustc_version", "smallvec", @@ -7085,9 +7083,9 @@ dependencies = [ [[package]] name = "nix" -version = "0.31.1" +version = "0.31.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66" +checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" dependencies = [ "bitflags 2.10.0", "cfg-if", @@ -7746,17 +7744,6 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.5" @@ -7764,21 +7751,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", - "parking_lot_core 0.9.12", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -7812,6 +7785,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pastey" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b867cad97c0791bbd3aaa6472142568c6c9e8f71937e98379f584cfb0cf35bec" + [[package]] name = "pbkdf2" version = "0.11.0" @@ -7955,18 +7934,18 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2 1.0.106", "quote 1.0.44", @@ -7993,7 +7972,7 @@ checksum = "d894b67aa7a4bf295db5e85349078c604edaa6fa5c8721e8eca3c7729a27f2ac" dependencies = [ "doc-comment", "flume 0.10.14", - "parking_lot 0.12.5", + "parking_lot", "tracing 0.1.44", ] @@ -8460,7 +8439,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools 0.14.0", + "itertools 0.10.5", "log", "multimap", "once_cell", @@ -8506,7 +8485,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2 1.0.106", "quote 1.0.44", "syn 2.0.117", @@ -8677,6 +8656,16 @@ dependencies = [ "uuid", ] +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.37.4" @@ -9152,7 +9141,7 @@ dependencies = [ "fluent-uri 0.4.1", "getrandom 0.3.4", "hashbrown 0.16.0", - "parking_lot 0.12.5", + "parking_lot", "percent-encoding", "serde_json", ] @@ -9218,6 +9207,15 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" +[[package]] +name = "relative-path" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bca40a312222d8ba74837cb474edef44b37f561da5f773981007a10bbaa992b0" +dependencies = [ + "serde", +] + [[package]] name = "rend" version = "0.4.1" @@ -9340,9 +9338,9 @@ dependencies = [ [[package]] name = "reqwest-retry" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c73e4195a6bfbcb174b790d9b3407ab90646976c55de58a6515da25d851178" +checksum = "105747e3a037fe5bf17458d794de91149e575b6183fc72c85623a44abb9683f5" dependencies = [ "anyhow", "async-trait", @@ -9350,13 +9348,12 @@ dependencies = [ "getrandom 0.2.15", "http 1.3.1", "hyper 1.7.0", - "parking_lot 0.11.2", "reqwest 0.12.28", "reqwest-middleware", "retry-policies", - "thiserror 1.0.68", + "thiserror 2.0.17", "tokio", - "wasm-timer", + "wasmtimer", ] [[package]] @@ -9367,11 +9364,11 @@ checksum = "6b3789b30bd25ba102de4beabd95d21ac45b69b1be7d14522bab988c526d6799" [[package]] name = "retry-policies" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5875471e6cab2871bc150ecb8c727db5113c9338cc3354dc5ee3425b6aa40a1c" +checksum = "46a4bd6027df676bcb752d3724db0ea3c0c5fc1dd0376fec51ac7dcaf9cc69be" dependencies = [ - "rand 0.8.5", + "rand 0.9.2", ] [[package]] @@ -9527,7 +9524,7 @@ dependencies = [ "proc-macro2 1.0.106", "quote 1.0.44", "regex", - "relative-path", + "relative-path 1.9.3", "rustc_version", "syn 2.0.117", "unicode-ident", @@ -10177,6 +10174,19 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "serde_yaml_ng" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4db627b98b36d4203a7b458cf3573730f2bb591b28871d916dfa9efabfd41f" +dependencies = [ + "indexmap 2.12.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "serial_test" version = "3.2.0" @@ -10186,7 +10196,7 @@ dependencies = [ "futures 0.3.31", "log", "once_cell", - "parking_lot 0.12.5", + "parking_lot", "scc", "serial_test_derive", ] @@ -10775,7 +10785,7 @@ checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" dependencies = [ "new_debug_unreachable", "once_cell", - "parking_lot 0.12.5", + "parking_lot", "phf_shared 0.10.0", "precomputed-hash", ] @@ -10963,7 +10973,7 @@ dependencies = [ "ntapi 0.4.1", "objc2-core-foundation", "objc2-io-kit", - "windows", + "windows 0.60.0", ] [[package]] @@ -11303,7 +11313,7 @@ dependencies = [ "bytes 1.11.1", "libc", "mio", - "parking_lot 0.12.5", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2 0.6.0", @@ -11378,7 +11388,7 @@ dependencies = [ "futures-channel", "futures-util", "log", - "parking_lot 0.12.5", + "parking_lot", "percent-encoding", "phf 0.13.1", "pin-project-lite", @@ -12430,7 +12440,7 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vdev" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "cfg-if", @@ -12448,7 +12458,7 @@ dependencies = [ "itertools 0.14.0", "log", "owo-colors", - "paste", + "pastey", "regex", "reqwest 0.11.26", "semver", @@ -12459,11 +12469,13 @@ dependencies = [ "tempfile", "toml 0.9.8", "toml_edit 0.23.9", + "vector-vrl-functions", + "vrl", ] [[package]] name = "vector" -version = "0.54.0" +version = "0.55.0" dependencies = [ "apache-avro 0.16.0", "approx", @@ -12577,7 +12589,7 @@ dependencies = [ "netlink-packet-sock-diag", "netlink-packet-utils", "netlink-sys", - "nix 0.31.1", + "nix 0.31.2", "nkeys", "nom 8.0.0", "notify", @@ -12586,7 +12598,7 @@ dependencies = [ "openssl-probe", "openssl-src", "ordered-float 4.6.0", - "paste", + "pastey", "percent-encoding", "pin-project", "postgres-openssl", @@ -12598,6 +12610,7 @@ dependencies = [ "prost-types 0.12.6", "pulsar", "quick-junit", + "quick-xml 0.31.0", "quickcheck", "rand 0.9.2", "rand_distr", @@ -12667,6 +12680,7 @@ dependencies = [ "vector-vrl-metrics", "vrl", "warp", + "windows 0.58.0", "windows-service", "wiremock", "zstd 0.13.2", @@ -12716,7 +12730,7 @@ dependencies = [ "metrics-util", "num-traits", "ordered-float 4.6.0", - "paste", + "pastey", "proptest", "quickcheck", "rand 0.9.2", @@ -12748,7 +12762,7 @@ dependencies = [ "indexmap 2.12.0", "itertools 0.14.0", "metrics", - "paste", + "pastey", "pin-project", "serde", "serde_json", @@ -12859,7 +12873,7 @@ dependencies = [ "noisy_float", "openssl", "ordered-float 4.6.0", - "parking_lot 0.12.5", + "parking_lot", "pin-project", "proptest", "prost 0.12.6", @@ -13090,12 +13104,11 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "vrl" -version = "0.30.0" -source = "git+https://github.com/vectordotdev/vrl.git?branch=main#bacda767c055ff37865ec96fe168b31b344252d1" +version = "0.31.0" +source = "git+https://github.com/vectordotdev/vrl.git?branch=main#d21e53192e6a3ed3d38ef6f32886f1cbf30d308c" dependencies = [ "aes", "aes-siv", - "ansi_term", "arbitrary", "base16", "base62", @@ -13145,6 +13158,7 @@ dependencies = [ "mlua", "nom 8.0.0", "nom-language", + "nu-ansi-term", "ofb", "onig", "ordered-float 4.6.0", @@ -13166,6 +13180,7 @@ dependencies = [ "quoted_printable", "rand 0.8.5", "regex", + "relative-path 2.0.1", "reqwest 0.12.28", "reqwest-middleware", "reqwest-retry", @@ -13176,6 +13191,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", + "serde_yaml_ng", "sha-1", "sha2", "sha3", @@ -13301,9 +13317,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.113" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60722a937f594b7fde9adb894d7c092fc1bb6612897c46368d18e7a20208eff2" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -13326,9 +13342,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.113" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac8c6395094b6b91c4af293f4c79371c163f9a6f56184d2c9a85f5a95f3950" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote 1.0.44", "wasm-bindgen-macro-support", @@ -13336,9 +13352,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.113" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3fabce6159dc20728033842636887e4877688ae94382766e00b180abac9d60" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2 1.0.106", @@ -13349,9 +13365,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.113" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de0e091bdb824da87dc01d967388880d017a0a9bc4f3bdc0d86ee9f9336e3bb5" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] @@ -13370,25 +13386,24 @@ dependencies = [ ] [[package]] -name = "wasm-timer" -version = "0.2.5" +name = "wasmtimer" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be0ecb0db480561e9a7642b5d3e4187c128914e58aa84330b9493e3eb68c5e7f" +checksum = "1c598d6b99ea013e35844697fc4670d08339d5cda15588f193c6beedd12f644b" dependencies = [ "futures 0.3.31", "js-sys", - "parking_lot 0.11.2", + "parking_lot", "pin-utils", + "slab", "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", ] [[package]] name = "web-sys" -version = "0.3.90" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "705eceb4ce901230f8625bd1d665128056ccbe4b7408faa625eec1ba80f59a97" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", @@ -13524,6 +13539,16 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core 0.58.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows" version = "0.60.0" @@ -13555,17 +13580,30 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement 0.58.0", + "windows-interface 0.58.0", + "windows-result 0.2.0", + "windows-strings 0.1.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows-core" version = "0.60.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca21a92a9cae9bf4ccae5cf8368dce0837100ddf6e6d57936749e85f152f6247" dependencies = [ - "windows-implement", - "windows-interface", + "windows-implement 0.59.0", + "windows-interface 0.59.1", "windows-link 0.1.0", - "windows-result", - "windows-strings", + "windows-result 0.3.1", + "windows-strings 0.3.1", ] [[package]] @@ -13578,6 +13616,17 @@ dependencies = [ "windows-link 0.1.0", ] +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.44", + "syn 2.0.117", +] + [[package]] name = "windows-implement" version = "0.59.0" @@ -13589,6 +13638,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.44", + "syn 2.0.117", +] + [[package]] name = "windows-interface" version = "0.59.1" @@ -13629,8 +13689,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c44a98275e31bfd112bb06ba96c8ab13c03383a3753fdddd715406a1824c7e0" dependencies = [ "windows-link 0.1.0", - "windows-result", - "windows-strings", + "windows-result 0.3.1", + "windows-strings 0.3.1", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -13653,6 +13722,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result 0.2.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows-strings" version = "0.3.1" diff --git a/Cargo.toml b/Cargo.toml index 56e3376b29f02..be6007cd740d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vector" -version = "0.54.0" +version = "0.55.0" authors = ["Vector Contributors "] edition = "2024" description = "A lightweight and ultra-fast tool for building observability pipelines" @@ -108,9 +108,7 @@ members = [ ".", "lib/codecs", "lib/dnsmsg-parser", - "lib/dnstap-parser", "lib/docs-renderer", - "lib/enrichment", "lib/fakedata", "lib/file-source", "lib/file-source-common", @@ -135,10 +133,12 @@ members = [ "lib/vector-top", "lib/vector-vrl/category", "lib/vector-vrl/cli", + "lib/vector-vrl/dnstap-parser", + "lib/vector-vrl/enrichment", "lib/vector-vrl/functions", + "lib/vector-vrl/metrics", "lib/vector-vrl/tests", "lib/vector-vrl/web-playground", - "lib/vector-vrl-metrics", "vdev", ] @@ -177,8 +177,8 @@ metrics-tracing-context = { version = "0.17.0", default-features = false } metrics-util = { version = "0.18.0", default-features = false, features = ["registry"] } nom = { version = "8.0.0", default-features = false } ordered-float = { version = "4.6.0", default-features = false } -paste = { version = "1.0.15" } -pin-project = { version = "1.1.10", default-features = false } +pastey = { version = "0.2", default-features = false } +pin-project = { version = "1.1.11", default-features = false } proptest = { version = "1.10" } proptest-derive = { version = "0.6.0" } prost = { version = "0.12", default-features = false, features = ["std"] } @@ -192,7 +192,7 @@ reqwest = { version = "0.11", features = ["json"] } rust_decimal = { version = "1.37.0", default-features = false, features = ["std"] } semver = { version = "1.0.27", default-features = false, features = ["serde", "std"] } serde = { version = "1.0.219", default-features = false, features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.143", default-features = false, features = ["raw_value", "std"] } +serde_json = { version = "1.0.143", default-features = false, features = ["preserve_order", "raw_value", "std"] } serde_yaml = { version = "0.9.34", default-features = false } snafu = { version = "0.8.9", default-features = false, features = ["futures", "std"] } socket2 = { version = "0.5.10", default-features = false } @@ -214,8 +214,8 @@ vector-config-macros = { path = "lib/vector-config-macros" } vector-common-macros = { path = "lib/vector-common-macros" } vector-lib = { path = "lib/vector-lib", default-features = false, features = ["vrl"] } vector-vrl-category = { path = "lib/vector-vrl/category" } -vector-vrl-functions = { path = "lib/vector-vrl/functions" } -vrl = { git = "https://github.com/vectordotdev/vrl.git", branch = "main", features = ["arbitrary", "cli", "test", "test_framework"] } +vector-vrl-functions = { path = "lib/vector-vrl/functions", default-features = false } +vrl = { git = "https://github.com/vectordotdev/vrl.git", branch = "main", default-features = false, features = ["arbitrary", "cli", "test", "test_framework", "stdlib-base"] } mock_instant = { version = "0.6" } serial_test = { version = "3.2" } @@ -226,7 +226,7 @@ reqwest_12 = { package = "reqwest", version = "0.12", features = ["json"] } clap.workspace = true clap_complete.workspace = true indoc.workspace = true -paste.workspace = true +pastey.workspace = true pin-project.workspace = true proptest = { workspace = true, optional = true } proptest-derive = { workspace = true, optional = true } @@ -237,7 +237,7 @@ vrl.workspace = true # Internal libs dnsmsg-parser = { path = "lib/dnsmsg-parser", optional = true } -dnstap-parser = { path = "lib/dnstap-parser", optional = true } +dnstap-parser = { path = "lib/vector-vrl/dnstap-parser", optional = true } fakedata = { path = "lib/fakedata", optional = true } tracing-limit = { path = "lib/tracing-limit" } vector-common = { path = "lib/vector-common", default-features = false } @@ -246,7 +246,7 @@ vector-config.workspace = true vector-config-common.workspace = true vector-config-macros.workspace = true vector-vrl-functions = { workspace = true, features = ["vrl-metrics"] } -vector-vrl-metrics = { path = "lib/vector-vrl-metrics" } +vector-vrl-metrics = { path = "lib/vector-vrl/metrics" } loki-logproto = { path = "lib/loki-logproto", optional = true } # Tokio / Futures @@ -454,6 +454,8 @@ byteorder = "1.5.0" [target.'cfg(windows)'.dependencies] windows-service = "0.8.0" +windows = { version = "0.58", features = ["Win32_System_EventLog", "Win32_Foundation", "Win32_System_Com", "Win32_Security", "Win32_Security_Authorization", "Win32_System_Threading", "Win32_Storage_FileSystem"], optional = true } +quick-xml = { version = "0.31", default-features = false, features = ["serialize"], optional = true } [target.'cfg(unix)'.dependencies] nix = { version = "0.31", default-features = false, features = ["socket", "signal", "fs"] } @@ -510,7 +512,7 @@ docs = ["enable-unix"] default-cmake = ["enable-unix", "rdkafka?/gssapi-vendored", "rdkafka?/cmake_build"] # Default features for *-pc-windows-msvc # TODO: Enable SASL https://github.com/vectordotdev/vector/pull/3081#issuecomment-659298042 -base = ["api", "enrichment-tables", "sinks", "sources", "transforms", "secrets"] +base = ["api", "enrichment-tables", "sinks", "sources", "transforms", "secrets", "vrl/stdlib"] enable-api-client = ["base", "api-client"] enable-unix = ["enable-api-client", "sources-dnstap", "unix"] @@ -520,6 +522,11 @@ default-no-api-client = ["base", "sources-dnstap", "unix", "rdkafka?/gssapi-vend tokio-console = ["dep:console-subscriber", "tokio/tracing"] +# VRL functions control features +vrl-functions-env = ["vrl/enable_env_functions"] +vrl-functions-system = ["vrl/enable_system_functions"] +vrl-functions-network = ["vrl/enable_network_functions"] + # Enables the binary secret-backend-example secret-backend-example = ["transforms"] @@ -641,6 +648,7 @@ sources-logs = [ "sources-syslog", "sources-vector", "sources-websocket", + "sources-windows_event_log", ] sources-metrics = [ "dep:prost", @@ -727,6 +735,8 @@ sources-utils-net-tcp = ["listenfd", "dep:ipnet"] sources-utils-net-udp = ["listenfd"] sources-utils-net-unix = [] sources-websocket = ["dep:tokio-tungstenite"] +sources-windows_event_log = ["dep:windows", "dep:quick-xml", "dep:governor"] +sources-windows_event_log-integration-tests = ["sources-windows_event_log"] sources-vector = ["dep:prost", "dep:tonic", "protobuf-build"] @@ -956,6 +966,7 @@ all-integration-tests = [ "splunk-integration-tests", "vector-api-tests", "webhdfs-integration-tests", + "windows-event-log-integration-tests", ] amqp-integration-tests = ["sources-amqp", "sinks-amqp"] @@ -1023,9 +1034,10 @@ redis-integration-tests = ["sinks-redis", "sources-redis"] splunk-integration-tests = ["sinks-splunk_hec"] dnstap-integration-tests = ["sources-dnstap", "dep:bollard"] webhdfs-integration-tests = ["sinks-webhdfs"] +windows-event-log-integration-tests = ["sources-windows_event_log-integration-tests"] disable-resolv-conf = [] shutdown-tests = ["api", "sinks-blackhole", "sinks-console", "sinks-prometheus", "sources", "transforms-lua", "transforms-remap", "unix"] -cli-tests = ["sinks-blackhole", "sinks-socket", "sources-demo_logs", "sources-file"] +cli-tests = ["sinks-blackhole", "sinks-socket", "sources-demo_logs", "sources-file", "transforms-remap"] test-utils = ["vector-lib/test"] # End-to-End testing-related features diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 87c2380cfc159..bcb65c4243a6e 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -15,7 +15,6 @@ amq-protocol-tcp,https://github.com/amqp-rs/amq-protocol,BSD-2-Clause,Marc-Antoi amq-protocol-types,https://github.com/amqp-rs/amq-protocol,BSD-2-Clause,Marc-Antoine Perennou <%arc-Antoine@Perennou.com> amq-protocol-uri,https://github.com/amqp-rs/amq-protocol,BSD-2-Clause,Marc-Antoine Perennou <%arc-Antoine@Perennou.com> android_system_properties,https://github.com/nical/android_system_properties,MIT OR Apache-2.0,Nicolas Silva -ansi_term,https://github.com/ogham/rust-ansi-term,MIT,"ogham@bsago.me, Ryan Scheel (Havvy) , Josh Triplett " anstream,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The anstream Authors anstyle,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The anstyle Authors anstyle-parse,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The anstyle-parse Authors @@ -539,13 +538,12 @@ p256,https://github.com/RustCrypto/elliptic-curves/tree/master/p256,Apache-2.0 O p384,https://github.com/RustCrypto/elliptic-curves/tree/master/p384,Apache-2.0 OR MIT,"RustCrypto Developers, Frank Denis " pad,https://github.com/ogham/rust-pad,MIT,Ben S parking,https://github.com/smol-rs/parking,Apache-2.0 OR MIT,"Stjepan Glavina , The Rust Project Developers" -parking_lot,https://github.com/Amanieu/parking_lot,Apache-2.0 OR MIT,Amanieu d'Antras parking_lot,https://github.com/Amanieu/parking_lot,MIT OR Apache-2.0,Amanieu d'Antras -parking_lot_core,https://github.com/Amanieu/parking_lot,Apache-2.0 OR MIT,Amanieu d'Antras parking_lot_core,https://github.com/Amanieu/parking_lot,MIT OR Apache-2.0,Amanieu d'Antras parse-size,https://github.com/kennytm/parse-size,MIT,kennytm passt,https://github.com/kevingimbel/passt,MIT OR Apache-2.0,Kevin Gimbel paste,https://github.com/dtolnay/paste,MIT OR Apache-2.0,David Tolnay +pastey,https://github.com/as1100k/pastey,MIT OR Apache-2.0,"Aditya Kumar , David Tolnay " pbkdf2,https://github.com/RustCrypto/password-hashes/tree/master/pbkdf2,MIT OR Apache-2.0,RustCrypto Developers peeking_take_while,https://github.com/fitzgen/peeking_take_while,MIT OR Apache-2.0,Nick Fitzgerald pem,https://github.com/jcreekmore/pem-rs,MIT,Jonathan Creekmore @@ -635,6 +633,7 @@ regex-automata,https://github.com/rust-lang/regex/tree/master/regex-automata,MIT regex-filtered,https://github.com/ua-parser/uap-rust,BSD-3-Clause,The regex-filtered Authors regex-lite,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " regex-syntax,https://github.com/rust-lang/regex/tree/master/regex-syntax,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " +relative-path,https://github.com/udoprog/relative-path,MIT OR Apache-2.0,John-John Tedro rend,https://github.com/djkoloski/rend,MIT,David Koloski reqwest,https://github.com/seanmonstar/reqwest,MIT OR Apache-2.0,Sean McArthur reqwest-middleware,https://github.com/TrueLayer/reqwest-middleware,MIT OR Apache-2.0,Rodrigo Gryzinski @@ -699,6 +698,7 @@ serde_urlencoded,https://github.com/nox/serde_urlencoded,MIT OR Apache-2.0,Antho serde_with,https://github.com/jonasbb/serde_with,MIT OR Apache-2.0,"Jonas Bushart, Marcin Kaźmierczak" serde_with_macros,https://github.com/jonasbb/serde_with,MIT OR Apache-2.0,Jonas Bushart serde_yaml,https://github.com/dtolnay/serde-yaml,MIT OR Apache-2.0,David Tolnay +serde_yaml_ng,https://github.com/acatton/serde-yaml-ng,MIT,Antoine Catton sha-1,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers sha1,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers sha2,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers @@ -864,7 +864,7 @@ wasm-bindgen-macro,https://github.com/wasm-bindgen/wasm-bindgen/tree/master/crat wasm-bindgen-macro-support,https://github.com/wasm-bindgen/wasm-bindgen/tree/master/crates/macro-support,MIT OR Apache-2.0,The wasm-bindgen Developers wasm-bindgen-shared,https://github.com/wasm-bindgen/wasm-bindgen/tree/master/crates/shared,MIT OR Apache-2.0,The wasm-bindgen Developers wasm-streams,https://github.com/MattiasBuelens/wasm-streams,MIT OR Apache-2.0,Mattias Buelens -wasm-timer,https://github.com/tomaka/wasm-timer,MIT,Pierre Krieger +wasmtimer,https://github.com/whizsid/wasmtimer-rs,MIT,"WhizSid , Pierre Krieger " web-sys,https://github.com/wasm-bindgen/wasm-bindgen/tree/master/crates/web-sys,MIT OR Apache-2.0,The wasm-bindgen Developers web-time,https://github.com/daxpedda/web-time,MIT OR Apache-2.0,The web-time Authors webbrowser,https://github.com/amodm/webbrowser-rs,MIT OR Apache-2.0,Amod Malviya @amodm diff --git a/Makefile b/Makefile index f87b9b445e998..c6e5d2423820c 100644 --- a/Makefile +++ b/Makefile @@ -373,7 +373,7 @@ test-behavior-config: ## Runs configuration related behavioral tests .PHONY: test-behavior-% test-behavior-%: ## Runs behavioral test for a given category - ${MAYBE_ENVIRONMENT_EXEC} cargo run --no-default-features --features transforms -- test tests/behavior/$*/* + ${MAYBE_ENVIRONMENT_EXEC} cargo run --no-default-features --features transforms,vrl-functions-env,vrl-functions-system,vrl-functions-network -- test tests/behavior/$*/* .PHONY: test-behavior test-behavior: ## Runs all behavioral tests @@ -389,6 +389,14 @@ test-integration: test-integration-nginx test-integration-opentelemetry test-int test-integration: test-integration-redis test-integration-splunk test-integration-dnstap test-integration-datadog-agent test-integration-datadog-logs test-integration-e2e-datadog-logs test-integration-e2e-opentelemetry-logs test-integration: test-integration-datadog-traces test-integration-shutdown +.PHONY: test-integration-windows-event-log +test-integration-windows-event-log: ## Runs Windows Event Log integration tests (Windows only) +ifeq ($(OS),Windows_NT) + ${MAYBE_ENVIRONMENT_EXEC} cargo test -p vector --no-default-features --features sources-windows_event_log-integration-tests windows_event_log::integration_tests +else + @echo "Skipping windows-event-log integration tests (Windows only)" +endif + test-integration-%-cleanup: $(VDEV) --verbose integration stop $* @@ -472,7 +480,7 @@ check: ## Run prerequisite code checks check-all: ## Check everything check-all: check-fmt check-clippy check-docs check-all: check-examples check-component-features -check-all: check-scripts check-deny check-component-docs check-licenses +check-all: check-scripts check-deny check-generated-docs check-licenses .PHONY: check-component-features check-component-features: ## Check that all component features are setup properly @@ -483,7 +491,7 @@ check-clippy: ## Check code with Clippy ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) check rust .PHONY: check-docs -check-docs: ## Check that all /docs file are valid +check-docs: generate-vrl-docs ## Check that all /docs file are valid - vrl docs due to remap.functions.* references ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) check docs .PHONY: check-fmt @@ -514,9 +522,9 @@ check-deny: ## Check advisories licenses and sources for crate dependencies check-events: ## Check that events satisfy patterns set in https://github.com/vectordotdev/vector/blob/master/rfcs/2020-03-17-2064-event-driven-observability.md ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) check events -.PHONY: check-component-docs -check-component-docs: generate-component-docs ## Checks that the machine-generated component Cue docs are up-to-date. - ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) check component-docs +.PHONY: check-generated-docs +check-generated-docs: generate-docs ## Checks that the machine-generated component Cue docs are up-to-date. + ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) check generated-docs ##@ Rustdoc build-rustdoc: ## Build Vector's Rustdocs @@ -540,22 +548,28 @@ package: build ## Build the Vector archive ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) package archive .PHONY: package-x86_64-unknown-linux-gnu-all -package-x86_64-unknown-linux-gnu-all: package-x86_64-unknown-linux-gnu package-deb-x86_64-unknown-linux-gnu package-rpm-x86_64-unknown-linux-gnu # Build all x86_64 GNU packages +package-x86_64-unknown-linux-gnu-all: package-x86_64-unknown-linux-gnu package-deb-x86_64-unknown-linux-gnu package-rpm-x86_64-unknown-linux-gnu # .tar.gz, .deb, .rpm .PHONY: package-x86_64-unknown-linux-musl-all -package-x86_64-unknown-linux-musl-all: package-x86_64-unknown-linux-musl # Build all x86_64 MUSL packages +package-x86_64-unknown-linux-musl-all: package-x86_64-unknown-linux-musl # .tar.gz .PHONY: package-aarch64-unknown-linux-musl-all -package-aarch64-unknown-linux-musl-all: package-aarch64-unknown-linux-musl # Build all aarch64 MUSL packages +package-aarch64-unknown-linux-musl-all: package-aarch64-unknown-linux-musl # .tar.gz .PHONY: package-aarch64-unknown-linux-gnu-all -package-aarch64-unknown-linux-gnu-all: package-aarch64-unknown-linux-gnu package-deb-aarch64 package-rpm-aarch64 # Build all aarch64 GNU packages +package-aarch64-unknown-linux-gnu-all: package-aarch64-unknown-linux-gnu package-deb-aarch64 package-rpm-aarch64 # .tar.gz, .deb, .rpm .PHONY: package-armv7-unknown-linux-gnueabihf-all -package-armv7-unknown-linux-gnueabihf-all: package-armv7-unknown-linux-gnueabihf package-deb-armv7-gnu package-rpm-armv7hl-gnu # Build all armv7-unknown-linux-gnueabihf MUSL packages +package-armv7-unknown-linux-gnueabihf-all: package-armv7-unknown-linux-gnueabihf package-deb-armv7-gnu package-rpm-armv7hl-gnu # .tar.gz, .deb, .rpm + +.PHONY: package-armv7-unknown-linux-musleabihf-all +package-armv7-unknown-linux-musleabihf-all: package-armv7-unknown-linux-musleabihf # .tar.gz .PHONY: package-arm-unknown-linux-gnueabi-all -package-arm-unknown-linux-gnueabi-all: package-arm-unknown-linux-gnueabi package-deb-arm-gnu # Build all arm-unknown-linux-gnueabihf GNU packages +package-arm-unknown-linux-gnueabi-all: package-arm-unknown-linux-gnueabi package-deb-arm-gnu # .tar.gz, .deb + +.PHONY: package-arm-unknown-linux-musleabi-all +package-arm-unknown-linux-musleabi-all: package-arm-unknown-linux-musleabi # .tar.gz .PHONY: package-x86_64-unknown-linux-gnu package-x86_64-unknown-linux-gnu: target/artifacts/vector-${VERSION}-x86_64-unknown-linux-gnu.tar.gz ## Build an archive suitable for the `x86_64-unknown-linux-gnu` triple. @@ -593,41 +607,41 @@ package-arm-unknown-linux-musleabi: target/artifacts/vector-${VERSION}-arm-unkno .PHONY: package-deb-x86_64-unknown-linux-gnu package-deb-x86_64-unknown-linux-gnu: package-x86_64-unknown-linux-gnu ## Build the x86_64 GNU deb package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=x86_64-unknown-linux-gnu -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package deb + TARGET=x86_64-unknown-linux-gnu $(VDEV) package deb .PHONY: package-deb-x86_64-unknown-linux-musl package-deb-x86_64-unknown-linux-musl: package-x86_64-unknown-linux-musl ## Build the x86_64 GNU deb package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=x86_64-unknown-linux-musl -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package deb + TARGET=x86_64-unknown-linux-musl $(VDEV) package deb .PHONY: package-deb-aarch64 package-deb-aarch64: package-aarch64-unknown-linux-gnu ## Build the aarch64 deb package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=aarch64-unknown-linux-gnu -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package deb + TARGET=aarch64-unknown-linux-gnu $(VDEV) package deb .PHONY: package-deb-armv7-gnu package-deb-armv7-gnu: package-armv7-unknown-linux-gnueabihf ## Build the armv7-unknown-linux-gnueabihf deb package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=armv7-unknown-linux-gnueabihf -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package deb + TARGET=armv7-unknown-linux-gnueabihf $(VDEV) package deb .PHONY: package-deb-arm-gnu package-deb-arm-gnu: package-arm-unknown-linux-gnueabi ## Build the arm-unknown-linux-gnueabi deb package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=arm-unknown-linux-gnueabi -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package deb + TARGET=arm-unknown-linux-gnueabi $(VDEV) package deb # rpms .PHONY: package-rpm-x86_64-unknown-linux-gnu package-rpm-x86_64-unknown-linux-gnu: package-x86_64-unknown-linux-gnu ## Build the x86_64 rpm package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=x86_64-unknown-linux-gnu -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package rpm + TARGET=x86_64-unknown-linux-gnu $(VDEV) package rpm .PHONY: package-rpm-x86_64-unknown-linux-musl package-rpm-x86_64-unknown-linux-musl: package-x86_64-unknown-linux-musl ## Build the x86_64 musl rpm package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=x86_64-unknown-linux-musl -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package rpm + TARGET=x86_64-unknown-linux-musl $(VDEV) package rpm .PHONY: package-rpm-aarch64 package-rpm-aarch64: package-aarch64-unknown-linux-gnu ## Build the aarch64 rpm package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=aarch64-unknown-linux-gnu -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package rpm + TARGET=aarch64-unknown-linux-gnu $(VDEV) package rpm .PHONY: package-rpm-armv7hl-gnu package-rpm-armv7hl-gnu: package-armv7-unknown-linux-gnueabihf ## Build the armv7hl-unknown-linux-gnueabihf rpm package - $(CONTAINER_TOOL) run -v $(PWD):/git/vectordotdev/vector/ -e TARGET=armv7-unknown-linux-gnueabihf -e ARCH=armv7hl -e VECTOR_VERSION $(ENVIRONMENT_UPSTREAM) cargo vdev package rpm + TARGET=armv7-unknown-linux-gnueabihf ARCH=armv7hl $(VDEV) package rpm ##@ Releasing @@ -694,6 +708,18 @@ generate-component-docs: ## Generate per-component Cue docs from the configurati $(if $(findstring true,$(CI)),>/dev/null,) ./scripts/cue.sh fmt +.PHONY: generate-vector-vrl-docs +generate-vector-vrl-docs: ## Generate VRL function documentation from Rust source. + ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) build vector-vrl-docs --output docs/generated/ \ + $(if $(findstring true,$(CI)),>/dev/null,) + +.PHONY: generate-vrl-docs +generate-vrl-docs: ## Generate combined VRL function documentation for the website. + $(MAKE) -C website generate-vrl-docs + +.PHONY: generate-docs +generate-docs: generate-component-docs generate-vector-vrl-docs generate-vrl-docs + .PHONY: signoff signoff: ## Signsoff all previous commits since branch creation scripts/signoff.sh diff --git a/buf.yaml b/buf.yaml index 892768db97363..319ed08506963 100644 --- a/buf.yaml +++ b/buf.yaml @@ -3,7 +3,7 @@ modules: - path: lib/vector-core/proto - path: proto/third-party - path: proto/vector - - path: lib/dnstap-parser/proto + - path: lib/vector-vrl/dnstap-parser/proto lint: use: - DEFAULT diff --git a/changelog.d/#####_limited-buffer-close-race.fix.md b/changelog.d/#####_limited-buffer-close-race.fix.md deleted file mode 100644 index 2006646cfa740..0000000000000 --- a/changelog.d/#####_limited-buffer-close-race.fix.md +++ /dev/null @@ -1,4 +0,0 @@ -Fixed a hard-to-trigger race between closing a memory buffer and outstanding -sends that could rarely cause a lost event array at shutdown. - -authors: bruceg diff --git a/changelog.d/22912_add_azure_logs_ingestion.feature.md b/changelog.d/22912_add_azure_logs_ingestion.feature.md deleted file mode 100644 index 38cc6a8c169e4..0000000000000 --- a/changelog.d/22912_add_azure_logs_ingestion.feature.md +++ /dev/null @@ -1,5 +0,0 @@ -Add support for the Azure Monitor Logs Ingestion API through a new `azure_logs_ingestion` sink. - -The `azure_monitor_logs` sink is now deprecated, and current users will need to migrate to `azure_logs_ingestion` before Microsoft end support for the old Data Collector API (currently scheduled for September 2026). - -authors: jlaundry diff --git a/changelog.d/23329_remove_patch_dependencies.fix.md b/changelog.d/23329_remove_patch_dependencies.fix.md deleted file mode 100644 index bbe05ca936856..0000000000000 --- a/changelog.d/23329_remove_patch_dependencies.fix.md +++ /dev/null @@ -1,3 +0,0 @@ -Remove the `tokio-util` patch override and preserve recoverable decoding behavior via `DecoderFramedRead`. - -authors: Trighap52 diff --git a/changelog.d/24074_clickhouse_arrow_complex_types.enhancement.md b/changelog.d/24074_clickhouse_arrow_complex_types.enhancement.md deleted file mode 100644 index 5a5d6c65ec212..0000000000000 --- a/changelog.d/24074_clickhouse_arrow_complex_types.enhancement.md +++ /dev/null @@ -1,3 +0,0 @@ -The `clickhouse` sink now supports complex data types (Array, Map, and Tuple) when using the `arrow_stream` format. - -authors: benjamin-awd diff --git a/changelog.d/24355_vector_top_controls.feature.md b/changelog.d/24355_vector_top_controls.feature.md deleted file mode 100644 index 386fde3ab00d7..0000000000000 --- a/changelog.d/24355_vector_top_controls.feature.md +++ /dev/null @@ -1,3 +0,0 @@ -Added new keybinds to `vector top` for scrolling, sorting and filtering. You can now press `?` when using `vector top` to see all available keybinds. - -authors: esensar Quad9DNS diff --git a/changelog.d/24393_log_to_metric_histogram_summary.fix.md b/changelog.d/24393_log_to_metric_histogram_summary.fix.md deleted file mode 100644 index 1dbe834ec9a70..0000000000000 --- a/changelog.d/24393_log_to_metric_histogram_summary.fix.md +++ /dev/null @@ -1,3 +0,0 @@ -The `log_to_metric` transform now correctly handles aggregated histogram and aggregated summary metrics. - -authors: jblazquez diff --git a/changelog.d/24455_otel_source_per_signal_decoding.enhancement.md b/changelog.d/24455_otel_source_per_signal_decoding.enhancement.md new file mode 100644 index 0000000000000..79db8934df463 --- /dev/null +++ b/changelog.d/24455_otel_source_per_signal_decoding.enhancement.md @@ -0,0 +1,21 @@ +The `opentelemetry` source now supports independent configuration of OTLP decoding for logs, metrics, and traces. This allows more granular +control over which signal types are decoded, while maintaining backward compatibility with the existing boolean configuration. + +## Simple boolean form (applies to all signals) + +```yaml +use_otlp_decoding: true # All signals preserve OTLP format +# or +use_otlp_decoding: false # All signals use Vector native format (default) +``` + +## Per-signal configuration + +```yaml +use_otlp_decoding: + logs: false # Convert to Vector native format + metrics: false # Convert to Vector native format + traces: true # Preserve OTLP format +``` + +authors: pront diff --git a/changelog.d/24603_prometheus_remote_write_healthcheck_uri.enhancement.md b/changelog.d/24603_prometheus_remote_write_healthcheck_uri.enhancement.md deleted file mode 100644 index d49a0c461040e..0000000000000 --- a/changelog.d/24603_prometheus_remote_write_healthcheck_uri.enhancement.md +++ /dev/null @@ -1,3 +0,0 @@ -The `prometheus_remote_write` sink now supports the `healthcheck.uri` field to customize the healthcheck endpoint. - -authors: simonhammes diff --git a/changelog.d/24650_buffer-utilization-metrics.fix.md b/changelog.d/24650_buffer-utilization-metrics.fix.md deleted file mode 100644 index b23c46e09dccd..0000000000000 --- a/changelog.d/24650_buffer-utilization-metrics.fix.md +++ /dev/null @@ -1,4 +0,0 @@ -Fixed recording of buffer utilization metrics to properly record on both send -and receive in order to reflect the actual level and not just the "full" level. - -authors: bruceg diff --git a/changelog.d/24667_clickhouse_arrow_default_columns.fix.md b/changelog.d/24667_clickhouse_arrow_default_columns.fix.md deleted file mode 100644 index c9a506a8ce312..0000000000000 --- a/changelog.d/24667_clickhouse_arrow_default_columns.fix.md +++ /dev/null @@ -1,3 +0,0 @@ -The ClickHouse sink's ArrowStream format now correctly handles MATERIALIZED, ALIAS, EPHEMERAL, and DEFAULT columns. MATERIALIZED, ALIAS, and EPHEMERAL columns are excluded from the fetched schema since they cannot receive INSERT data. DEFAULT columns are kept but marked nullable so events are not rejected when the server-computed value is omitted. - -authors: benjamin-awd diff --git a/changelog.d/7538_filesystem_inode_metrics.feature.md b/changelog.d/7538_filesystem_inode_metrics.feature.md deleted file mode 100644 index c43ae8d680ba3..0000000000000 --- a/changelog.d/7538_filesystem_inode_metrics.feature.md +++ /dev/null @@ -1,3 +0,0 @@ -Added inode metrics to the `host_metrics` source filesystem collector on unix systems. The `filesystem_inodes_total`, `filesystem_inodes_free`, `filesystem_inodes_used`, and `filesystem_inodes_used_ratio` metrics are now available. - -authors: mushrowan diff --git a/changelog.d/arrow_encoder_replace_serde_arrow.enhancement.md b/changelog.d/arrow_encoder_replace_serde_arrow.enhancement.md deleted file mode 100644 index b0cd73c35c89b..0000000000000 --- a/changelog.d/arrow_encoder_replace_serde_arrow.enhancement.md +++ /dev/null @@ -1,3 +0,0 @@ -The `arrow_stream` codec now uses `arrow-json` instead of `serde_arrow` for Arrow encoding. - -authors: benjamin-awd diff --git a/changelog.d/azure_blob_connect_proxy.feature.md b/changelog.d/azure_blob_connect_proxy.feature.md deleted file mode 100644 index ccc915e798bcc..0000000000000 --- a/changelog.d/azure_blob_connect_proxy.feature.md +++ /dev/null @@ -1,3 +0,0 @@ -The `azure_blob` sink now supports routing requests through HTTP/HTTPS proxies, enabling uploads from restricted networks that require an outbound proxy. - -authors: joshuacoughlan diff --git a/changelog.d/component_latency-metrics.enhancement.md b/changelog.d/component_latency-metrics.enhancement.md deleted file mode 100644 index 844a8867a6099..0000000000000 --- a/changelog.d/component_latency-metrics.enhancement.md +++ /dev/null @@ -1,5 +0,0 @@ -Added the `component_latency_seconds` histogram and -`component_latency_mean_seconds` gauge internal metrics, exposing the time an -event spends in a single transform including the transform buffer. - -authors: bruceg diff --git a/changelog.d/datadog-logs-zstd.enhancement.md b/changelog.d/datadog-logs-zstd.enhancement.md deleted file mode 100644 index 573853bb8366f..0000000000000 --- a/changelog.d/datadog-logs-zstd.enhancement.md +++ /dev/null @@ -1,6 +0,0 @@ -The `datadog_logs` sink now defaults to `zstd` compression instead of no compression. This results in -better network efficiency and higher throughput. You can explicitly set `compression = "none"` to -restore the previous behavior of no compression, or set `compression = "gzip"` if you were previously -using gzip compression explicitly. - -authors: jszwedko pront diff --git a/changelog.d/gcp_cloud_storage_content_encoding.enhancement.md b/changelog.d/gcp_cloud_storage_content_encoding.enhancement.md deleted file mode 100644 index 36100dc87362b..0000000000000 --- a/changelog.d/gcp_cloud_storage_content_encoding.enhancement.md +++ /dev/null @@ -1,3 +0,0 @@ -Add `content_encoding` and `cache_control` options to the `gcp_cloud_storage` sink. `content_encoding` overrides the `Content-Encoding` header (defaults to the compression scheme's content encoding). `cache_control` sets the `Cache-Control` header for created objects. - -authors: benjamin-awd diff --git a/changelog.d/optimize-websocket-source.enhancement.md b/changelog.d/optimize-websocket-source.enhancement.md deleted file mode 100644 index 9745ed1ebc329..0000000000000 --- a/changelog.d/optimize-websocket-source.enhancement.md +++ /dev/null @@ -1,3 +0,0 @@ -Small optimization to the `websocket` source performance by avoiding getting a new time for every event in an array. - -authors: bruceg diff --git a/changelog.d/prometheus_remote_write_custom_headers.enhancement.md b/changelog.d/prometheus_remote_write_custom_headers.enhancement.md deleted file mode 100644 index ae02c955b9886..0000000000000 --- a/changelog.d/prometheus_remote_write_custom_headers.enhancement.md +++ /dev/null @@ -1,3 +0,0 @@ -The `prometheus_remote_write` sink now supports custom HTTP headers via the `request.headers` configuration option. This allows users to add custom headers to outgoing requests, which is useful for authentication, routing, or other integration requirements with Prometheus-compatible backends. - -authors: elohmeier diff --git a/changelog.d/remove-no-vrl-cli-flag.breaking.md b/changelog.d/remove-no-vrl-cli-flag.breaking.md deleted file mode 100644 index c1d99f1863265..0000000000000 --- a/changelog.d/remove-no-vrl-cli-flag.breaking.md +++ /dev/null @@ -1,5 +0,0 @@ -Removed the misleadingly-named `default-no-vrl-cli` feature flag, which did not control VRL CLI compilation. -This flag was equivalent to `default` without `api-client` and `enrichment-tables`. -Use `default-no-api-client` as a replacement (note: this includes `enrichment-tables`) or define custom features as needed. - -authors: thomasqueirozb diff --git a/changelog.d/tag_cardinality_limit_metric_labels.enhancement.md b/changelog.d/tag_cardinality_limit_metric_labels.enhancement.md deleted file mode 100644 index b6bf201e8f7e2..0000000000000 --- a/changelog.d/tag_cardinality_limit_metric_labels.enhancement.md +++ /dev/null @@ -1,4 +0,0 @@ -# Added `internal_metrics.include_extended_tags` configuration option - -Added `internal_metrics` configuration section to the `tag_cardinality_limit` transform to better organize internal metrics configuration. The `internal_metrics.include_extended_tags` option controls whether to include extended tags (`metric_name`, `tag_key`) in the `tag_value_limit_exceeded_total` metric to help identify which specific metrics and tag keys are hitting the configured value limit. This option defaults to `false` because these tags have potentially unbounded cardinality. -authors: kaarolch diff --git a/changelog.d/time-weighted-buffer-utilization-mean.breaking.md b/changelog.d/time-weighted-buffer-utilization-mean.breaking.md deleted file mode 100644 index 24e8d0a048cf8..0000000000000 --- a/changelog.d/time-weighted-buffer-utilization-mean.breaking.md +++ /dev/null @@ -1,9 +0,0 @@ -The `*buffer_utilization_mean` metrics have been enhanced to use time-weighted -averaging which make them more representative of the actual buffer utilization -over time. - -This change is breaking due to the replacement of the existing -`buffer_utilization_ewma_alpha` config option with -`buffer_utilization_ewma_half_life_seconds`. - -authors: bruceg diff --git a/changelog.d/windows_event_log_source.feature.md b/changelog.d/windows_event_log_source.feature.md new file mode 100644 index 0000000000000..7f64f115a3391 --- /dev/null +++ b/changelog.d/windows_event_log_source.feature.md @@ -0,0 +1,3 @@ +Added a new `windows_event_log` source that collects logs from Windows Event Log channels using the native Windows Event Log API with pull-mode subscriptions, bookmark-based checkpointing, and configurable field filtering. + +authors: tot19 diff --git a/deny.toml b/deny.toml index 32a8862fd30ec..1da31eaa106c9 100644 --- a/deny.toml +++ b/deny.toml @@ -43,11 +43,10 @@ ignore = [ # There is not fix available yet. # https://github.com/vectordotdev/vector/issues/19262 "RUSTSEC-2023-0071", - { id = "RUSTSEC-2021-0139", reason = " ansi_term is unmaintained" }, { id = "RUSTSEC-2024-0388", reason = "derivative is unmaintained" }, { id = "RUSTSEC-2024-0384", reason = "instant is unmaintained" }, { id = "RUSTSEC-2020-0168", reason = "mach is unmaintained" }, - { id = "RUSTSEC-2024-0436", reason = "paste is unmaintained" }, + { id = "RUSTSEC-2024-0436", reason = "paste is unmaintained, still a transitive dep via netlink-packet-utils and rmp" }, { id = "RUSTSEC-2025-0012", reason = "backoff is unmaintained" }, # rustls-pemfile is unmaintained. Blocked by both async-nats and http 1.0.0 upgrade. { id = "RUSTSEC-2025-0134", reason = "rustls-pemfile is unmaintained" }, diff --git a/distribution/docker/alpine/Dockerfile b/distribution/docker/alpine/Dockerfile index 9d40daff5ce8e..426254abffd39 100644 --- a/distribution/docker/alpine/Dockerfile +++ b/distribution/docker/alpine/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/alpine:3.23 AS builder +FROM docker.io/alpine:3.23@sha256:25109184c71bdad752c8312a8623239686a9a2071e8825f20acb8f2198c3f659 AS builder WORKDIR /vector @@ -12,7 +12,7 @@ RUN ARCH=$(if [ "$TARGETPLATFORM" = "linux/arm/v6" ]; then echo "arm"; else cat RUN mkdir -p /var/lib/vector -FROM docker.io/alpine:3.23 +FROM docker.io/alpine:3.23@sha256:25109184c71bdad752c8312a8623239686a9a2071e8825f20acb8f2198c3f659 # https://github.com/opencontainers/image-spec/blob/main/annotations.md LABEL org.opencontainers.image.url="https://vector.dev" diff --git a/distribution/docker/debian/Dockerfile b/distribution/docker/debian/Dockerfile index 64b46426397cd..095989978231a 100644 --- a/distribution/docker/debian/Dockerfile +++ b/distribution/docker/debian/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/debian:trixie-slim AS builder +FROM docker.io/debian:trixie-slim@sha256:1d3c811171a08a5adaa4a163fbafd96b61b87aa871bbc7aa15431ac275d3d430 AS builder WORKDIR /vector @@ -7,7 +7,7 @@ RUN dpkg -i vector_*_"$(dpkg --print-architecture)".deb RUN mkdir -p /var/lib/vector -FROM docker.io/debian:trixie-slim +FROM docker.io/debian:trixie-slim@sha256:1d3c811171a08a5adaa4a163fbafd96b61b87aa871bbc7aa15431ac275d3d430 # https://github.com/opencontainers/image-spec/blob/main/annotations.md LABEL org.opencontainers.image.url="https://vector.dev" diff --git a/distribution/docker/distroless-libc/Dockerfile b/distribution/docker/distroless-libc/Dockerfile index 7cff6f487d5b4..2e3988b7403fe 100644 --- a/distribution/docker/distroless-libc/Dockerfile +++ b/distribution/docker/distroless-libc/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/debian:trixie-slim AS builder +FROM docker.io/debian:trixie-slim@sha256:1d3c811171a08a5adaa4a163fbafd96b61b87aa871bbc7aa15431ac275d3d430 AS builder WORKDIR /vector @@ -9,7 +9,7 @@ RUN mkdir -p /var/lib/vector # distroless doesn't use static tags # hadolint ignore=DL3007 -FROM gcr.io/distroless/cc-debian12:latest +FROM gcr.io/distroless/cc-debian12:latest@sha256:329e54034ce498f9c6b345044e8f530c6691f99e94a92446f68c0adf9baa8464 # https://github.com/opencontainers/image-spec/blob/main/annotations.md LABEL org.opencontainers.image.url="https://vector.dev" diff --git a/distribution/docker/distroless-static/Dockerfile b/distribution/docker/distroless-static/Dockerfile index d692f48321614..9518be0c4492c 100644 --- a/distribution/docker/distroless-static/Dockerfile +++ b/distribution/docker/distroless-static/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/alpine:3.23 AS builder +FROM docker.io/alpine:3.23@sha256:25109184c71bdad752c8312a8623239686a9a2071e8825f20acb8f2198c3f659 AS builder WORKDIR /vector @@ -9,7 +9,7 @@ RUN mkdir -p /var/lib/vector # distroless doesn't use static tags # hadolint ignore=DL3007 -FROM gcr.io/distroless/static:latest +FROM gcr.io/distroless/static:latest@sha256:28efbe90d0b2f2a3ee465cc5b44f3f2cf5533514cf4d51447a977a5dc8e526d0 # https://github.com/opencontainers/image-spec/blob/main/annotations.md LABEL org.opencontainers.image.url="https://vector.dev" diff --git a/distribution/install.sh b/distribution/install.sh index 39591daba2945..ceeb50c253862 100755 --- a/distribution/install.sh +++ b/distribution/install.sh @@ -13,7 +13,7 @@ set -u # If PACKAGE_ROOT is unset or empty, default it. PACKAGE_ROOT="${PACKAGE_ROOT:-"https://packages.timber.io/vector"}" # If VECTOR_VERSION is unset or empty, default it. -VECTOR_VERSION="${VECTOR_VERSION:-"0.53.0"}" +VECTOR_VERSION="${VECTOR_VERSION:-"0.54.0"}" _divider="--------------------------------------------------------------------------------" _prompt=">>>" _indent=" " diff --git a/distribution/kubernetes/vector-agent/README.md b/distribution/kubernetes/vector-agent/README.md index a3724e12329c9..dfe0f05deb137 100644 --- a/distribution/kubernetes/vector-agent/README.md +++ b/distribution/kubernetes/vector-agent/README.md @@ -1,6 +1,6 @@ The kubernetes manifests found in this directory have been automatically generated from the [helm chart `vector/vector`](https://github.com/vectordotdev/helm-charts/tree/master/charts/vector) -version 0.50.0 with the following `values.yaml`: +version 0.51.0 with the following `values.yaml`: ```yaml role: Agent diff --git a/distribution/kubernetes/vector-agent/configmap.yaml b/distribution/kubernetes/vector-agent/configmap.yaml index 822f9a2f46b09..e3d7499257236 100644 --- a/distribution/kubernetes/vector-agent/configmap.yaml +++ b/distribution/kubernetes/vector-agent/configmap.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" data: agent.yaml: | data_dir: /vector-data-dir diff --git a/distribution/kubernetes/vector-agent/daemonset.yaml b/distribution/kubernetes/vector-agent/daemonset.yaml index 4269320475e5e..4c468b88989a7 100644 --- a/distribution/kubernetes/vector-agent/daemonset.yaml +++ b/distribution/kubernetes/vector-agent/daemonset.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" spec: selector: matchLabels: @@ -30,7 +30,7 @@ spec: dnsPolicy: ClusterFirst containers: - name: vector - image: "timberio/vector:0.53.0-distroless-libc" + image: "docker.io/timberio/vector:0.54.0-distroless-libc" imagePullPolicy: IfNotPresent args: - --config-dir diff --git a/distribution/kubernetes/vector-agent/rbac.yaml b/distribution/kubernetes/vector-agent/rbac.yaml index c2a122a73d05b..93b151826e238 100644 --- a/distribution/kubernetes/vector-agent/rbac.yaml +++ b/distribution/kubernetes/vector-agent/rbac.yaml @@ -10,7 +10,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" rules: - apiGroups: - "" @@ -31,7 +31,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/distribution/kubernetes/vector-agent/service-headless.yaml b/distribution/kubernetes/vector-agent/service-headless.yaml index 50ffc5628ee46..2f3a72b2619fd 100644 --- a/distribution/kubernetes/vector-agent/service-headless.yaml +++ b/distribution/kubernetes/vector-agent/service-headless.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" annotations: spec: clusterIP: None diff --git a/distribution/kubernetes/vector-agent/serviceaccount.yaml b/distribution/kubernetes/vector-agent/serviceaccount.yaml index 2814a485ca4e2..9db1dca796f94 100644 --- a/distribution/kubernetes/vector-agent/serviceaccount.yaml +++ b/distribution/kubernetes/vector-agent/serviceaccount.yaml @@ -9,5 +9,5 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Agent - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" automountServiceAccountToken: true diff --git a/distribution/kubernetes/vector-aggregator/README.md b/distribution/kubernetes/vector-aggregator/README.md index 505fc2138c0df..213cd06790f2e 100644 --- a/distribution/kubernetes/vector-aggregator/README.md +++ b/distribution/kubernetes/vector-aggregator/README.md @@ -1,6 +1,6 @@ The kubernetes manifests found in this directory have been automatically generated from the [helm chart `vector/vector`](https://github.com/vectordotdev/helm-charts/tree/master/charts/vector) -version 0.50.0 with the following `values.yaml`: +version 0.51.0 with the following `values.yaml`: ```yaml diff --git a/distribution/kubernetes/vector-aggregator/configmap.yaml b/distribution/kubernetes/vector-aggregator/configmap.yaml index a6a1f82010b58..39fa1093bffa6 100644 --- a/distribution/kubernetes/vector-aggregator/configmap.yaml +++ b/distribution/kubernetes/vector-aggregator/configmap.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" data: aggregator.yaml: | data_dir: /vector-data-dir diff --git a/distribution/kubernetes/vector-aggregator/service-headless.yaml b/distribution/kubernetes/vector-aggregator/service-headless.yaml index ecc8494e0256a..3170f1c3751cd 100644 --- a/distribution/kubernetes/vector-aggregator/service-headless.yaml +++ b/distribution/kubernetes/vector-aggregator/service-headless.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" annotations: spec: clusterIP: None diff --git a/distribution/kubernetes/vector-aggregator/service.yaml b/distribution/kubernetes/vector-aggregator/service.yaml index 6a1438db5d1b8..3c423d4590371 100644 --- a/distribution/kubernetes/vector-aggregator/service.yaml +++ b/distribution/kubernetes/vector-aggregator/service.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" annotations: spec: ports: diff --git a/distribution/kubernetes/vector-aggregator/serviceaccount.yaml b/distribution/kubernetes/vector-aggregator/serviceaccount.yaml index 4ddd4c0774715..4f97844c7fee1 100644 --- a/distribution/kubernetes/vector-aggregator/serviceaccount.yaml +++ b/distribution/kubernetes/vector-aggregator/serviceaccount.yaml @@ -9,5 +9,5 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" automountServiceAccountToken: true diff --git a/distribution/kubernetes/vector-aggregator/statefulset.yaml b/distribution/kubernetes/vector-aggregator/statefulset.yaml index f163a10a8d41c..d107f7d55cd91 100644 --- a/distribution/kubernetes/vector-aggregator/statefulset.yaml +++ b/distribution/kubernetes/vector-aggregator/statefulset.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" annotations: {} spec: replicas: 1 @@ -34,7 +34,7 @@ spec: dnsPolicy: ClusterFirst containers: - name: vector - image: "timberio/vector:0.53.0-distroless-libc" + image: "docker.io/timberio/vector:0.54.0-distroless-libc" imagePullPolicy: IfNotPresent args: - --config-dir diff --git a/distribution/kubernetes/vector-stateless-aggregator/README.md b/distribution/kubernetes/vector-stateless-aggregator/README.md index 3eaea97af7e20..d03fb7eb5250b 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/README.md +++ b/distribution/kubernetes/vector-stateless-aggregator/README.md @@ -1,6 +1,6 @@ The kubernetes manifests found in this directory have been automatically generated from the [helm chart `vector/vector`](https://github.com/vectordotdev/helm-charts/tree/master/charts/vector) -version 0.50.0 with the following `values.yaml`: +version 0.51.0 with the following `values.yaml`: ```yaml role: Stateless-Aggregator diff --git a/distribution/kubernetes/vector-stateless-aggregator/configmap.yaml b/distribution/kubernetes/vector-stateless-aggregator/configmap.yaml index bce12915342a8..e13b42240886c 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/configmap.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/configmap.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" data: aggregator.yaml: | data_dir: /vector-data-dir diff --git a/distribution/kubernetes/vector-stateless-aggregator/deployment.yaml b/distribution/kubernetes/vector-stateless-aggregator/deployment.yaml index 7385a4b8e2151..60e585952669b 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/deployment.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/deployment.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" annotations: {} spec: replicas: 1 @@ -32,7 +32,7 @@ spec: dnsPolicy: ClusterFirst containers: - name: vector - image: "timberio/vector:0.53.0-distroless-libc" + image: "docker.io/timberio/vector:0.54.0-distroless-libc" imagePullPolicy: IfNotPresent args: - --config-dir diff --git a/distribution/kubernetes/vector-stateless-aggregator/service-headless.yaml b/distribution/kubernetes/vector-stateless-aggregator/service-headless.yaml index ecea5e6e62213..bb2c35189c395 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/service-headless.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/service-headless.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" annotations: spec: clusterIP: None diff --git a/distribution/kubernetes/vector-stateless-aggregator/service.yaml b/distribution/kubernetes/vector-stateless-aggregator/service.yaml index bc394012f1681..b96e58ef1ea31 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/service.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/service.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" annotations: spec: ports: diff --git a/distribution/kubernetes/vector-stateless-aggregator/serviceaccount.yaml b/distribution/kubernetes/vector-stateless-aggregator/serviceaccount.yaml index 7205cb183ed13..2636f02bed059 100644 --- a/distribution/kubernetes/vector-stateless-aggregator/serviceaccount.yaml +++ b/distribution/kubernetes/vector-stateless-aggregator/serviceaccount.yaml @@ -9,5 +9,5 @@ metadata: app.kubernetes.io/name: vector app.kubernetes.io/instance: vector app.kubernetes.io/component: Stateless-Aggregator - app.kubernetes.io/version: "0.53.0-distroless-libc" + app.kubernetes.io/version: "0.54.0-distroless-libc" automountServiceAccountToken: true diff --git a/docs/DEVELOPING.md b/docs/DEVELOPING.md index f32965244c28f..d286c9feca343 100644 --- a/docs/DEVELOPING.md +++ b/docs/DEVELOPING.md @@ -127,7 +127,7 @@ Loosely, you'll need the following: - **To run integration tests:** Have `docker` available, or a real live version of that service. (Use `AUTOSPAWN=false`) - **To run `make check-component-features`:** Have `remarshal` installed. - **To run `make check-licenses` or `make build-licenses`:** Have `dd-rust-license-tool` [installed](https://github.com/DataDog/rust-license-tool). -- **To run `make generate-component-docs`:** Have `cue` [installed](https://cuelang.org/docs/install/). +- **To run `make generate-docs`:** Have `cue` [installed](https://cuelang.org/docs/install/). If you find yourself needing to run something inside the Docker environment described above, that's totally fine, they won't collide or hurt each other. In this case, you'd just run `make environment-generate`. @@ -161,8 +161,8 @@ cargo bench transforms::example # Format your code before pushing! make fmt cargo fmt -# Build component documentation for the website -make generate-component-docs +# Build component and VRL documentation for the website +make generate-docs ``` If you run `make` you'll see a full list of all our tasks. Some of these will start Docker containers, sign commits, or even make releases. These are not common development commands and your mileage may vary. @@ -435,14 +435,7 @@ tests related only to this component, the following approach can reduce waiting times: 1. Install [cargo-watch](https://github.com/passcod/cargo-watch). -2. (Only for GNU/Linux) Install LLVM 9 (for example, package `llvm-9` on Debian) - and set `RUSTFLAGS` environment variable to use `lld` as the linker: - - ```sh - export RUSTFLAGS='-Clinker=clang-9 -Clink-arg=-fuse-ld=lld' - ``` - -3. Run in the root directory of Vector's source +2. Run in the root directory of Vector's source ```sh cargo watch -s clear -s \ diff --git a/docs/DOCUMENTING.md b/docs/DOCUMENTING.md index a0529de37a8a9..e6a032fee8dcf 100644 --- a/docs/DOCUMENTING.md +++ b/docs/DOCUMENTING.md @@ -11,12 +11,13 @@ documentation in tandem with code changes. 2. [Reference documentation](#reference-documentation) 1. [Installing CUE](#installing-cue) 2. [Generating from source code](#generating-from-source-code) - 3. [Formatting](#formatting) - 4. [Validating](#validating) + 3. [Adding Documentation for New Components](#adding-documentation-for-new-components) + 4. [Formatting](#formatting) + 5. [Validating](#validating) 1. [Tips & tricks](#tips--tricks) 1. [Make small incremental changes](#make-small-incremental-changes) - 5. [Changelog](#changelog) - 6. [Release highlights](#release-highlights) + 6. [Changelog](#changelog) + 7. [Release highlights](#release-highlights) 1. [FAQ](#faq) 1. [What makes a release highlight noteworthy?](#what-makes-a-release-highlight-noteworthy) 2. [How is a release highlight different from a blog post?](#how-is-a-release-highlight-different-from-a-blog-post) @@ -61,9 +62,45 @@ Much of Vector's reference documentation is automatically compiled from source c To regenerate this content, run: ```bash -make generate-component-docs +make generate-docs ``` +### Adding Documentation for New Components + +When introducing a new source, sink, or transform, you need to create documentation in two steps: + +1. **Generate the base documentation** from your Rust configuration schema: + + ```bash + make generate-component-docs + ``` + + This creates an auto-generated CUE file in `website/cue/reference/components/{sources,sinks,transforms}/generated/.cue` containing all the configuration options from your Rust code. + Note that documentation for config parameters behavior should be part of the Rust docs. The above command will generate a CUE file populated with the Rust docs. + +2. **Create a manual CUE file** with additional metadata that cannot be auto-generated: + - Create a new file at `website/cue/reference/components/{sources,sinks,transforms}/.cue` + - This file should include metadata like title, description, examples, feature classifications, and how-it-works sections. See existing CUE files for guidance. + - Look at existing components for examples, such as `website/cue/reference/components/transforms/remap.cue` + +3. **Format the CUE files**: + + ```bash + ./scripts/cue.sh fmt + ``` + +4. **Create a markdown documentation file** for the website: + - Create a new file at `website/content/en/docs/reference/configuration/{sources,sinks,transforms}/.md` + - Look at existing examples e.g. `website/content/en/docs/reference/configuration/transforms/remap.md` + +5. **Verify your documentation** is correct: + + ```bash + make check-generated-docs + ``` + +6. It is recommended to `cd website && make serve` to view how the documentation renders on the Vector website. + ### Formatting Vector has some CUE-related CI checks that are run whenever changes are made to diff --git a/docs/RUST_STYLE.md b/docs/RUST_STYLE.md new file mode 100644 index 0000000000000..5ee34496af7b8 --- /dev/null +++ b/docs/RUST_STYLE.md @@ -0,0 +1,100 @@ +# Rust Style Guide for Vector + +> **Note: +** This is a draft document primarily intended for AI agents (like Claude) to understand Vector's Rust coding conventions. These guidelines help ensure consistent code generation and modifications. + +This document outlines Rust coding conventions and patterns for Vector development. + +## Import Statements (`use`) + +All `use` statements must be at the **top of the file/module** or at the top of `mod tests`. +This is for consistency. + +**Correct:** + +```rust +use std::time::Duration; +use governor::clock; +use crate::config::TransformConfig; + +fn my_function() { + // function code +} +``` + +**Incorrect:** + +```rust +fn my_function() { + use std::time::Duration; // WRONG; Do not insert `use` inside functions + // function code +} +``` + +**Organization:** + +- Group imports: `std` → external crates → internal (`crate::`) +- Use `rustfmt` to automatically organize them: `make fmt` + +## Logging Style + +Always use the [Tracing crate](https://tracing.rs/tracing/)'s key/value style: + +**Correct:** + +```rust +warn!(message = "Failed to merge value.", %error); +info!(message = "Processing batch.", batch_size, internal_log_rate_secs = 1); +``` + +**Incorrect:** + +```rust +warn!("Failed to merge value: {}.", err); // Don't do this +``` + +**Rules:** + +- Events should be capitalized and end with a period +- Use `error` (not `e` or `err`) for error values +- Prefer Display over Debug: `%error` not `?error` +- Key/value pairs provide structured logging + +## String Formatting + +Prefer inline variable syntax in format strings (Rust 1.58+). + +**Correct:** + +```rust +format!("Error: {err}"); +println!("Processing {count} items"); +``` + +**Incorrect:** + +```rust +format!("Error: {}", err); // Unnecessary positional argument +println!("Processing {} items", count); +``` + +**Why:** Inline syntax is more readable and reduces mistakes with argument ordering. + +## Panics + +Code in Vector should **NOT** panic under normal circumstances. + +- Panics are only acceptable when assumptions about internal state are violated (indicating a bug) +- All potential panics **MUST** be documented in function documentation +- Prefer `Result` and proper error handling + +## Feature Flags + +New components (sources, sinks, transforms) must be behind feature flags: + +```bash +# Build only specific component for faster iteration +cargo test --lib --no-default-features --features sinks-console sinks::console +``` + +See `features` section in `Cargo.toml` for examples. diff --git a/docs/generated/README.md b/docs/generated/README.md new file mode 100644 index 0000000000000..681ba8e239005 --- /dev/null +++ b/docs/generated/README.md @@ -0,0 +1,11 @@ +# Generated Vector VRL Function Documentation + +These JSON files are auto-generated from the Vector VRL function definitions. + +Do **not** alter these files directly as they will be overwritten by CI. + +To regenerate: + +```sh +make generate-vector-vrl-docs +``` diff --git a/docs/generated/aggregate_vector_metrics.json b/docs/generated/aggregate_vector_metrics.json new file mode 100644 index 0000000000000..3bce36deb1ff4 --- /dev/null +++ b/docs/generated/aggregate_vector_metrics.json @@ -0,0 +1,73 @@ +{ + "anchor": "aggregate_vector_metrics", + "name": "aggregate_vector_metrics", + "category": "Metrics", + "description": "Aggregates internal Vector metrics, using one of 4 aggregation functions, filtering by name and optionally by tags. Returns the aggregated value. Only includes counter and gauge metrics.\n\nInternal Vector metrics functions work with a snapshot of the metrics. The interval at which the snapshot is updated is controlled through the [`metrics_storage_refresh_period`](/docs/reference/configuration/global-options/#metrics_storage_refresh_period) global option. Higher values can reduce performance impact of that process, but may cause stale metrics data in the snapshot.", + "arguments": [ + { + "name": "function", + "description": "The aggregation function to apply to the matched metrics.", + "required": true, + "type": [ + "string" + ], + "enum": { + "sum": "Sum the values of all the matched metrics.", + "avg": "Find the average of the values of all the matched metrics.", + "max": "Find the highest metric value of all the matched metrics.", + "min": "Find the lowest metric value of all the matched metrics." + } + }, + { + "name": "key", + "description": "The metric name to aggregate.", + "required": true, + "type": [ + "string" + ] + }, + { + "name": "tags", + "description": "Tags to filter the results on. Values in this object support wildcards ('*') to match on parts of the tag value.", + "required": false, + "type": [ + "object" + ], + "default": "{ }" + } + ], + "return": { + "types": [ + "float", + "null" + ] + }, + "examples": [ + { + "title": "Sum vector internal metrics matching the name", + "source": "aggregate_vector_metrics(\"sum\", \"utilization\")", + "return": 0.5 + }, + { + "title": "Sum vector internal metrics matching the name and tags", + "source": "aggregate_vector_metrics(\"sum\", \"utilization\", tags: {\"component_id\": \"test\"})", + "return": 0.5 + }, + { + "title": "Average of vector internal metrics matching the name", + "source": "aggregate_vector_metrics(\"avg\", \"utilization\")", + "return": 0.5 + }, + { + "title": "Max of vector internal metrics matching the name", + "source": "aggregate_vector_metrics(\"max\", \"utilization\")", + "return": 0.5 + }, + { + "title": "Min of vector internal metrics matching the name", + "source": "aggregate_vector_metrics(\"min\", \"utilization\")", + "return": 0.5 + } + ], + "pure": true +} diff --git a/docs/generated/find_enrichment_table_records.json b/docs/generated/find_enrichment_table_records.json new file mode 100644 index 0000000000000..105c16d7ca3bd --- /dev/null +++ b/docs/generated/find_enrichment_table_records.json @@ -0,0 +1,121 @@ +{ + "anchor": "find_enrichment_table_records", + "name": "find_enrichment_table_records", + "category": "Enrichment", + "description": "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for rows that match the provided condition.\n\nFor `file` enrichment tables, this condition needs to be a VRL object in which\nthe key-value pairs indicate a field to search mapped to a value to search in that field.\nThis function returns the rows that match the provided condition(s). _All_ fields need to\nmatch for rows to be returned; if any fields do not match, then no rows are returned.\n\nThere are three forms of search criteria:\n\n1. **Exact match search**. The given field must match the value exactly. Case sensitivity\n can be specified using the `case_sensitive` argument. An exact match search can use an\n index directly into the dataset, which should make this search fairly \"cheap\" from a\n performance perspective.\n\n2. **Wildcard match search**. The given fields specified by the exact match search may also\n be matched exactly to the value provided to the `wildcard` parameter.\n A wildcard match search can also use an index directly into the dataset.\n\n3. **Date range search**. The given field must be greater than or equal to the `from` date\n and/or less than or equal to the `to` date. A date range search involves\n sequentially scanning through the rows that have been located using any exact match\n criteria. This can be an expensive operation if there are many rows returned by any exact\n match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment\n data set is very small.\n\nFor `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair\nwhose value needs to be a valid IP address. Example: `{\"ip\": .ip }`. If a return field is expected\nand without a value, `null` is used. This table can return the following fields:\n\n* ISP databases:\n * `autonomous_system_number`\n * `autonomous_system_organization`\n * `isp`\n * `organization`\n\n* City databases:\n * `city_name`\n * `continent_code`\n * `country_code`\n * `country_name`\n * `region_code`\n * `region_name`\n * `metro_code`\n * `latitude`\n * `longitude`\n * `postal_code`\n * `timezone`\n\n* Connection-Type databases:\n * `connection_type`\n\nTo use this function, you need to update your configuration to\ninclude an\n[`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)\nparameter.", + "arguments": [ + { + "name": "table", + "description": "The [enrichment table](/docs/reference/glossary/#enrichment-tables) to search.", + "required": true, + "type": [ + "string" + ] + }, + { + "name": "condition", + "description": "The condition to search on. Since the condition is used at boot time to create indices into the data, these conditions must be statically defined.", + "required": true, + "type": [ + "object" + ] + }, + { + "name": "select", + "description": "A subset of fields from the enrichment table to return. If not specified, all fields are returned.", + "required": false, + "type": [ + "array" + ] + }, + { + "name": "case_sensitive", + "description": "Whether text fields need to match cases exactly.", + "required": false, + "type": [ + "boolean" + ], + "default": "true" + }, + { + "name": "wildcard", + "description": "Value to use for wildcard matching in the search.", + "required": false, + "type": [ + "string" + ] + } + ], + "return": { + "types": [ + "array" + ] + }, + "examples": [ + { + "title": "Exact match", + "source": "find_enrichment_table_records!(\n \"test\",\n {\"surname\": \"Smith\"}\n)\n", + "return": [ + { + "id": 1, + "firstname": "Bob", + "surname": "Smith" + }, + { + "id": 2, + "firstname": "Fred", + "surname": "Smith" + } + ] + }, + { + "title": "Case insensitive match", + "source": "find_enrichment_table_records!(\n \"test\",\n {\"surname\": \"smith\"},\n case_sensitive: false\n)\n", + "return": [ + { + "id": 1, + "firstname": "Bob", + "surname": "Smith" + }, + { + "id": 2, + "firstname": "Fred", + "surname": "Smith" + } + ] + }, + { + "title": "Wildcard match", + "source": "find_enrichment_table_records!(\n \"test\",\n {\"firstname\": \"Bob\"},\n wildcard: \"fred\",\n case_sensitive: false\n)\n", + "return": [ + { + "id": 1, + "firstname": "Bob", + "surname": "Smith" + }, + { + "id": 2, + "firstname": "Fred", + "surname": "Smith" + } + ] + }, + { + "title": "Date range search", + "source": "find_enrichment_table_records!(\n \"test\",\n {\n \"surname\": \"Smith\",\n \"date_of_birth\": {\n \"from\": t'1985-01-01T00:00:00Z',\n \"to\": t'1985-12-31T00:00:00Z'\n }\n }\n)\n", + "return": [ + { + "id": 1, + "firstname": "Bob", + "surname": "Smith" + }, + { + "id": 2, + "firstname": "Fred", + "surname": "Smith" + } + ] + } + ], + "pure": true +} diff --git a/docs/generated/find_vector_metrics.json b/docs/generated/find_vector_metrics.json new file mode 100644 index 0000000000000..30b7255c37268 --- /dev/null +++ b/docs/generated/find_vector_metrics.json @@ -0,0 +1,67 @@ +{ + "anchor": "find_vector_metrics", + "name": "find_vector_metrics", + "category": "Metrics", + "description": "Searches internal Vector metrics by name and optionally by tags. Returns all matching metrics.\n\nInternal Vector metrics functions work with a snapshot of the metrics. The interval at which the snapshot is updated is controlled through the [`metrics_storage_refresh_period`](/docs/reference/configuration/global-options/#metrics_storage_refresh_period) global option. Higher values can reduce performance impact of that process, but may cause stale metrics data in the snapshot.", + "arguments": [ + { + "name": "key", + "description": "The metric name to search.", + "required": true, + "type": [ + "string" + ] + }, + { + "name": "tags", + "description": "Tags to filter the results on. Values in this object support wildcards ('*') to match on parts of the tag value.", + "required": false, + "type": [ + "object" + ], + "default": "{ }" + } + ], + "return": { + "types": [ + "array" + ] + }, + "examples": [ + { + "title": "Find vector internal metrics matching the name", + "source": "find_vector_metrics(\"utilization\")", + "return": [ + { + "name": "utilization", + "tags": { + "component_id": [ + "test" + ] + }, + "type": "gauge", + "kind": "absolute", + "value": 0.5 + } + ] + }, + { + "title": "Find vector internal metrics matching the name and tags", + "source": "find_vector_metrics(\"utilization\", tags: {\"component_id\": \"test\"})", + "return": [ + { + "name": "utilization", + "tags": { + "component_id": [ + "test" + ] + }, + "type": "gauge", + "kind": "absolute", + "value": 0.5 + } + ] + } + ], + "pure": true +} diff --git a/docs/generated/get_enrichment_table_record.json b/docs/generated/get_enrichment_table_record.json new file mode 100644 index 0000000000000..947e8b3e65374 --- /dev/null +++ b/docs/generated/get_enrichment_table_record.json @@ -0,0 +1,88 @@ +{ + "anchor": "get_enrichment_table_record", + "name": "get_enrichment_table_record", + "category": "Enrichment", + "description": "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for a row that matches the provided condition. A single row must be matched. If no rows are found or more than one row is found, an error is returned.\n\nFor `file` enrichment tables, this condition needs to be a VRL object in which\nthe key-value pairs indicate a field to search mapped to a value to search in that field.\nThis function returns the rows that match the provided condition(s). _All_ fields need to\nmatch for rows to be returned; if any fields do not match, then no rows are returned.\n\nThere are three forms of search criteria:\n\n1. **Exact match search**. The given field must match the value exactly. Case sensitivity\n can be specified using the `case_sensitive` argument. An exact match search can use an\n index directly into the dataset, which should make this search fairly \"cheap\" from a\n performance perspective.\n\n2. **Wildcard match search**. The given fields specified by the exact match search may also\n be matched exactly to the value provided to the `wildcard` parameter.\n A wildcard match search can also use an index directly into the dataset.\n\n3. **Date range search**. The given field must be greater than or equal to the `from` date\n and/or less than or equal to the `to` date. A date range search involves\n sequentially scanning through the rows that have been located using any exact match\n criteria. This can be an expensive operation if there are many rows returned by any exact\n match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment\n data set is very small.\n\nFor `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair\nwhose value needs to be a valid IP address. Example: `{\"ip\": .ip }`. If a return field is expected\nand without a value, `null` is used. This table can return the following fields:\n\n* ISP databases:\n * `autonomous_system_number`\n * `autonomous_system_organization`\n * `isp`\n * `organization`\n\n* City databases:\n * `city_name`\n * `continent_code`\n * `country_code`\n * `country_name`\n * `region_code`\n * `region_name`\n * `metro_code`\n * `latitude`\n * `longitude`\n * `postal_code`\n * `timezone`\n\n* Connection-Type databases:\n * `connection_type`\n\nTo use this function, you need to update your configuration to\ninclude an\n[`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)\nparameter.", + "arguments": [ + { + "name": "table", + "description": "The [enrichment table](/docs/reference/glossary/#enrichment-tables) to search.", + "required": true, + "type": [ + "string" + ] + }, + { + "name": "condition", + "description": "The condition to search on. Since the condition is used at boot time to create indices into the data, these conditions must be statically defined.", + "required": true, + "type": [ + "object" + ] + }, + { + "name": "select", + "description": "A subset of fields from the enrichment table to return. If not specified, all fields are returned.", + "required": false, + "type": [ + "array" + ] + }, + { + "name": "case_sensitive", + "description": "Whether the text fields match the case exactly.", + "required": false, + "type": [ + "boolean" + ], + "default": "true" + }, + { + "name": "wildcard", + "description": "Value to use for wildcard matching in the search.", + "required": false, + "type": [ + "string" + ] + } + ], + "return": { + "types": [ + "object" + ] + }, + "internal_failure_reasons": [ + "The row is not found.", + "Multiple rows are found that match the condition." + ], + "examples": [ + { + "title": "Exact match", + "source": "get_enrichment_table_record!(\"test\", {\"id\": 1})", + "return": { + "id": 1, + "firstname": "Bob", + "surname": "Smith" + } + }, + { + "title": "Case insensitive match", + "source": "get_enrichment_table_record!(\n \"test\",\n {\"surname\": \"bob\", \"firstname\": \"John\"},\n case_sensitive: false\n)\n", + "return": { + "id": 1, + "firstname": "Bob", + "surname": "Smith" + } + }, + { + "title": "Date range search", + "source": "get_enrichment_table_record!(\n \"test\",\n {\n \"surname\": \"Smith\",\n \"date_of_birth\": {\n \"from\": t'1985-01-01T00:00:00Z',\n \"to\": t'1985-12-31T00:00:00Z'\n }\n }\n)\n", + "return": { + "id": 1, + "firstname": "Bob", + "surname": "Smith" + } + } + ], + "pure": true +} diff --git a/docs/generated/get_secret.json b/docs/generated/get_secret.json new file mode 100644 index 0000000000000..3010d09f81acc --- /dev/null +++ b/docs/generated/get_secret.json @@ -0,0 +1,35 @@ +{ + "anchor": "get_secret", + "name": "get_secret", + "category": "Event", + "description": "Returns the value of the given secret from an event.", + "arguments": [ + { + "name": "key", + "description": "The name of the secret.", + "required": true, + "type": [ + "string" + ] + } + ], + "return": { + "types": [ + "string", + "null" + ] + }, + "examples": [ + { + "title": "Get the Datadog API key from the event metadata", + "source": "get_secret(\"datadog_api_key\")", + "return": "secret value" + }, + { + "title": "Get a non existent secret", + "source": "get_secret(\"i_dont_exist\")", + "return": null + } + ], + "pure": true +} diff --git a/docs/generated/get_vector_metric.json b/docs/generated/get_vector_metric.json new file mode 100644 index 0000000000000..e6e40dd8c0e7d --- /dev/null +++ b/docs/generated/get_vector_metric.json @@ -0,0 +1,64 @@ +{ + "anchor": "get_vector_metric", + "name": "get_vector_metric", + "category": "Metrics", + "description": "Searches internal Vector metrics by name and optionally by tags. Returns the first matching metric.\n\nInternal Vector metrics functions work with a snapshot of the metrics. The interval at which the snapshot is updated is controlled through the [`metrics_storage_refresh_period`](/docs/reference/configuration/global-options/#metrics_storage_refresh_period) global option. Higher values can reduce performance impact of that process, but may cause stale metrics data in the snapshot.", + "arguments": [ + { + "name": "key", + "description": "The metric name to search.", + "required": true, + "type": [ + "string" + ] + }, + { + "name": "tags", + "description": "Tags to filter the results on. Values in this object support wildcards ('*') to match on parts of the tag value.", + "required": false, + "type": [ + "object" + ], + "default": "{ }" + } + ], + "return": { + "types": [ + "object", + "null" + ] + }, + "examples": [ + { + "title": "Get a vector internal metric matching the name", + "source": "get_vector_metric(\"utilization\")", + "return": { + "name": "utilization", + "tags": { + "component_id": [ + "test" + ] + }, + "type": "gauge", + "kind": "absolute", + "value": 0.5 + } + }, + { + "title": "Get a vector internal metric matching the name and tags", + "source": "get_vector_metric(\"utilization\", tags: {\"component_id\": \"test\"})", + "return": { + "name": "utilization", + "tags": { + "component_id": [ + "test" + ] + }, + "type": "gauge", + "kind": "absolute", + "value": 0.5 + } + } + ], + "pure": true +} diff --git a/docs/generated/parse_dnstap.json b/docs/generated/parse_dnstap.json new file mode 100644 index 0000000000000..6bc7356fe5201 --- /dev/null +++ b/docs/generated/parse_dnstap.json @@ -0,0 +1,142 @@ +{ + "anchor": "parse_dnstap", + "name": "parse_dnstap", + "category": "Parse", + "description": "Parses the `value` as base64 encoded DNSTAP data.", + "arguments": [ + { + "name": "value", + "description": "The base64 encoded representation of the DNSTAP data to parse.", + "required": true, + "type": [ + "string" + ] + }, + { + "name": "lowercase_hostnames", + "description": "Whether to turn all hostnames found in resulting data lowercase, for consistency.", + "required": false, + "type": [ + "boolean" + ], + "default": "false" + } + ], + "return": { + "types": [ + "object" + ] + }, + "internal_failure_reasons": [ + "`value` is not a valid base64 encoded string.", + "dnstap parsing failed for `value`" + ], + "examples": [ + { + "title": "Parse dnstap query message", + "source": "parse_dnstap!(\"ChVqYW1lcy1WaXJ0dWFsLU1hY2hpbmUSC0JJTkQgOS4xNi4zGgBy5wEIAxACGAEiEAAAAAAAAAAAAAAAAAAAAAAqECABBQJwlAAAAAAAAAAAADAw8+0CODVA7+zq9wVNMU3WNlI2kwIAAAABAAAAAAABCWZhY2Vib29rMQNjb20AAAEAAQAAKQIAAACAAAAMAAoACOxjCAG9zVgzWgUDY29tAGAAbQAAAAByZLM4AAAAAQAAAAAAAQJoNQdleGFtcGxlA2NvbQAABgABAAApBNABAUAAADkADwA1AAlubyBTRVAgbWF0Y2hpbmcgdGhlIERTIGZvdW5kIGZvciBkbnNzZWMtZmFpbGVkLm9yZy54AQ==\")", + "return": { + "dataType": "Message", + "dataTypeId": 1, + "extraInfo": "", + "messageType": "ResolverQuery", + "messageTypeId": 3, + "queryZone": "com.", + "requestData": { + "fullRcode": 0, + "header": { + "aa": false, + "ad": false, + "anCount": 0, + "arCount": 1, + "cd": false, + "id": 37634, + "nsCount": 0, + "opcode": 0, + "qdCount": 1, + "qr": 0, + "ra": false, + "rcode": 0, + "rd": false, + "tc": false + }, + "opt": { + "do": true, + "ednsVersion": 0, + "extendedRcode": 0, + "options": [ + { + "optCode": 10, + "optName": "Cookie", + "optValue": "7GMIAb3NWDM=" + } + ], + "udpPayloadSize": 512 + }, + "question": [ + { + "class": "IN", + "domainName": "facebook1.com.", + "questionType": "A", + "questionTypeId": 1 + } + ], + "rcodeName": "NoError" + }, + "responseData": { + "fullRcode": 16, + "header": { + "aa": false, + "ad": false, + "anCount": 0, + "arCount": 1, + "cd": false, + "id": 45880, + "nsCount": 0, + "opcode": 0, + "qdCount": 1, + "qr": 0, + "ra": false, + "rcode": 16, + "rd": false, + "tc": false + }, + "opt": { + "do": false, + "ednsVersion": 1, + "extendedRcode": 1, + "ede": [ + { + "extraText": "no SEP matching the DS found for dnssec-failed.org.", + "infoCode": 9, + "purpose": "DNSKEY Missing" + } + ], + "udpPayloadSize": 1232 + }, + "question": [ + { + "class": "IN", + "domainName": "h5.example.com.", + "questionType": "SOA", + "questionTypeId": 6 + } + ], + "rcodeName": "BADVERS" + }, + "responseAddress": "2001:502:7094::30", + "responsePort": 53, + "serverId": "james-Virtual-Machine", + "serverVersion": "BIND 9.16.3", + "socketFamily": "INET6", + "socketProtocol": "UDP", + "sourceAddress": "::", + "sourcePort": 46835, + "time": 1593489007920014129, + "timePrecision": "ns", + "timestamp": "2020-06-30T03:50:07.920014129Z" + } + } + ], + "pure": true +} diff --git a/docs/generated/remove_secret.json b/docs/generated/remove_secret.json new file mode 100644 index 0000000000000..a351309087fbb --- /dev/null +++ b/docs/generated/remove_secret.json @@ -0,0 +1,29 @@ +{ + "anchor": "remove_secret", + "name": "remove_secret", + "category": "Event", + "description": "Removes a secret from an event.", + "arguments": [ + { + "name": "key", + "description": "The name of the secret to remove.", + "required": true, + "type": [ + "string" + ] + } + ], + "return": { + "types": [ + "null" + ] + }, + "examples": [ + { + "title": "Remove the datadog api key", + "source": "remove_secret(\"datadog_api_key\")", + "return": null + } + ], + "pure": true +} diff --git a/docs/generated/set_secret.json b/docs/generated/set_secret.json new file mode 100644 index 0000000000000..6d889e83be1a2 --- /dev/null +++ b/docs/generated/set_secret.json @@ -0,0 +1,37 @@ +{ + "anchor": "set_secret", + "name": "set_secret", + "category": "Event", + "description": "Sets the given secret in the event.", + "arguments": [ + { + "name": "key", + "description": "The name of the secret.", + "required": true, + "type": [ + "string" + ] + }, + { + "name": "secret", + "description": "The secret value.", + "required": true, + "type": [ + "string" + ] + } + ], + "return": { + "types": [ + "null" + ] + }, + "examples": [ + { + "title": "Set the datadog api key", + "source": "set_secret(\"datadog_api_key\", \"secret-value\")", + "return": null + } + ], + "pure": true +} diff --git a/docs/generated/set_semantic_meaning.json b/docs/generated/set_semantic_meaning.json new file mode 100644 index 0000000000000..113bd3509c670 --- /dev/null +++ b/docs/generated/set_semantic_meaning.json @@ -0,0 +1,40 @@ +{ + "anchor": "set_semantic_meaning", + "name": "set_semantic_meaning", + "category": "Event", + "description": "Sets a semantic meaning for an event.", + "arguments": [ + { + "name": "target", + "description": "The path of the value that is assigned a meaning.", + "required": true, + "type": [ + "any" + ] + }, + { + "name": "meaning", + "description": "The name of the meaning to assign.", + "required": true, + "type": [ + "string" + ] + } + ], + "return": { + "types": [ + "null" + ] + }, + "examples": [ + { + "title": "Sets custom field semantic meaning", + "source": "set_semantic_meaning(.foo, \"bar\")", + "return": null + } + ], + "notices": [ + "This function assigns meaning at startup, and has _no_ runtime behavior. It is suggested\nto put all calls to this function at the beginning of a VRL function. The function\ncannot be conditionally called. For example, using an if statement cannot stop the\nmeaning from being assigned." + ], + "pure": true +} diff --git a/lib/codecs/Cargo.toml b/lib/codecs/Cargo.toml index abaa66e898840..0ed26d0b16a41 100644 --- a/lib/codecs/Cargo.toml +++ b/lib/codecs/Cargo.toml @@ -21,7 +21,7 @@ chrono.workspace = true rust_decimal.workspace = true csv-core = { version = "0.1.13", default-features = false } derivative.workspace = true -derive_more = { version = "2.0.1", optional = true, features = ["from", "display"] } +derive_more = { version = "2.1.1", optional = true, features = ["from", "display"] } dyn-clone = { version = "1", default-features = false } flate2.workspace = true futures.workspace = true diff --git a/lib/codecs/src/encoding/format/syslog.rs b/lib/codecs/src/encoding/format/syslog.rs index 601d4d3c04617..188ecb0d4cb52 100644 --- a/lib/codecs/src/encoding/format/syslog.rs +++ b/lib/codecs/src/encoding/format/syslog.rs @@ -1,11 +1,14 @@ use bytes::{BufMut, BytesMut}; use chrono::{DateTime, SecondsFormat, SubsecRound, Utc}; use lookup::lookup_v2::ConfigTargetPath; -use std::collections::HashMap; +use serde_json; +use std::borrow::Cow; +use std::collections::BTreeMap; use std::fmt::Write; use std::str::FromStr; use strum::{EnumString, FromRepr, VariantNames}; use tokio_util::codec::Encoder; +use tracing::debug; use vector_config::configurable_component; use vector_core::{ config::DataType, @@ -83,11 +86,8 @@ impl Encoder for SyslogSerializer { fn encode(&mut self, event: Event, buffer: &mut BytesMut) -> Result<(), Self::Error> { if let Event::Log(log_event) = event { let syslog_message = ConfigDecanter::new(&log_event).decant_config(&self.config.syslog); - let vec = syslog_message - .encode(&self.config.syslog.rfc) - .as_bytes() - .to_vec(); - buffer.put_slice(&vec); + let encoded = syslog_message.encode(&self.config.syslog.rfc); + buffer.put_slice(encoded.as_bytes()); } Ok(()) @@ -116,19 +116,24 @@ impl<'a> ConfigDecanter<'a> { }); let mut proc_id = self.get_value(&config.proc_id); let mut msg_id = self.get_value(&config.msg_id); - if config.rfc == SyslogRFC::Rfc5424 { - if app_name.len() > 48 { - app_name.truncate(48); - } - if let Some(pid) = &mut proc_id - && pid.len() > 128 - { - pid.truncate(128); + + match config.rfc { + SyslogRFC::Rfc3164 => { + // RFC 3164: TAG field (app_name and proc_id) must be ASCII printable + app_name = sanitize_to_ascii(&app_name).into_owned(); + if let Some(pid) = &mut proc_id { + *pid = sanitize_to_ascii(pid).into_owned(); + } } - if let Some(mid) = &mut msg_id - && mid.len() > 32 - { - mid.truncate(32); + SyslogRFC::Rfc5424 => { + // Truncate to character limits (not byte limits to avoid UTF-8 panics) + truncate_chars(&mut app_name, 48); + if let Some(pid) = &mut proc_id { + truncate_chars(pid, 128); + } + if let Some(mid) = &mut msg_id { + truncate_chars(mid, 32); + } } } @@ -218,6 +223,76 @@ impl<'a> ConfigDecanter<'a> { const NIL_VALUE: &str = "-"; const SYSLOG_V1: &str = "1"; const RFC3164_TAG_MAX_LENGTH: usize = 32; +const SD_ID_MAX_LENGTH: usize = 32; + +/// Replaces invalid characters with '_' +#[inline] +fn sanitize_with(s: &str, is_valid: F) -> Cow<'_, str> +where + F: Fn(char) -> bool, +{ + match s.char_indices().find(|(_, c)| !is_valid(*c)) { + None => Cow::Borrowed(s), // All valid, zero allocation + Some((first_invalid_idx, _)) => { + let mut result = String::with_capacity(s.len()); + result.push_str(&s[..first_invalid_idx]); // Copy valid prefix + for c in s[first_invalid_idx..].chars() { + result.push(if is_valid(c) { c } else { '_' }); + } + + Cow::Owned(result) + } + } +} + +/// Sanitize a string to ASCII printable characters (space to tilde, ASCII 32-126) +/// Used for RFC 3164 TAG field (app_name and proc_id) +/// Invalid characters are replaced with '_' +#[inline] +fn sanitize_to_ascii(s: &str) -> Cow<'_, str> { + sanitize_with(s, |c| (' '..='~').contains(&c)) +} + +/// Sanitize SD-ID or PARAM-NAME according to RFC 5424 +/// Per RFC 5424, these NAMES must only contain printable ASCII (33-126) +/// excluding '=', ' ', ']', '"' +/// Invalid characters are replaced with '_' +#[inline] +fn sanitize_name(name: &str) -> Cow<'_, str> { + sanitize_with(name, |c| { + c.is_ascii_graphic() && !matches!(c, '=' | ']' | '"') + }) +} + +/// Escape PARAM-VALUE according to RFC 5424 +fn escape_sd_value(s: &str) -> Cow<'_, str> { + let needs_escaping = s.chars().any(|c| matches!(c, '\\' | '"' | ']')); + + if !needs_escaping { + return Cow::Borrowed(s); + } + + let mut result = String::with_capacity(s.len() + 10); + for ch in s.chars() { + match ch { + '\\' => result.push_str("\\\\"), + '"' => result.push_str("\\\""), + ']' => result.push_str("\\]"), + _ => result.push(ch), + } + } + + Cow::Owned(result) +} + +/// Safely truncate a string to a maximum number of characters (not bytes!) +/// This avoids panics when truncating at a multi-byte UTF-8 character boundary +/// Optimized to iterate only through necessary characters (not the entire string) +fn truncate_chars(s: &mut String, max_chars: usize) { + if let Some((byte_idx, _)) = s.char_indices().nth(max_chars) { + s.truncate(byte_idx); + } +} /// The syslog RFC standard to use for formatting. #[configurable_component] @@ -243,59 +318,68 @@ struct SyslogMessage { impl SyslogMessage { fn encode(&self, rfc: &SyslogRFC) -> String { - let pri_header = self.pri.encode(); + let mut result = String::with_capacity(256); - let mut parts = Vec::new(); + let _ = write!(result, "{}", self.pri.encode()); - let timestamp_str = match rfc { - SyslogRFC::Rfc3164 => self.timestamp.format("%b %e %H:%M:%S").to_string(), - SyslogRFC::Rfc5424 => self - .timestamp - .round_subsecs(6) - .to_rfc3339_opts(SecondsFormat::Micros, true), - }; - parts.push(timestamp_str); - parts.push(self.hostname.as_deref().unwrap_or(NIL_VALUE).to_string()); + if *rfc == SyslogRFC::Rfc5424 { + result.push_str(SYSLOG_V1); + result.push(' '); + } - let tag_str = match rfc { - SyslogRFC::Rfc3164 => self.tag.encode_rfc_3164(), - SyslogRFC::Rfc5424 => self.tag.encode_rfc_5424(), - }; - parts.push(tag_str); + match rfc { + SyslogRFC::Rfc3164 => { + let _ = write!(result, "{} ", self.timestamp.format("%b %e %H:%M:%S")); + } + SyslogRFC::Rfc5424 => { + result.push_str( + &self + .timestamp + .round_subsecs(6) + .to_rfc3339_opts(SecondsFormat::Micros, true), + ); + result.push(' '); + } + } - let mut message_part = self.message.clone(); - if *rfc == SyslogRFC::Rfc3164 { - message_part = Self::sanitize_rfc3164_message(&message_part); + result.push_str(self.hostname.as_deref().unwrap_or(NIL_VALUE)); + result.push(' '); + + match rfc { + SyslogRFC::Rfc3164 => result.push_str(&self.tag.encode_rfc_3164()), + SyslogRFC::Rfc5424 => result.push_str(&self.tag.encode_rfc_5424()), } + result.push(' '); - if let Some(sd) = &self.structured_data { - let sd_string = sd.encode(); - if *rfc == SyslogRFC::Rfc3164 { - if !sd.elements.is_empty() { - if !message_part.is_empty() { - message_part = format!("{sd_string} {message_part}"); - } else { - message_part = sd_string; - } - } + if *rfc == SyslogRFC::Rfc3164 { + // RFC 3164 does not support structured data + if let Some(sd) = &self.structured_data + && !sd.elements.is_empty() + { + debug!( + "Structured data present but ignored - RFC 3164 does not support structured data. Consider using RFC 5424 instead." + ); + } + } else { + if let Some(sd) = &self.structured_data { + result.push_str(&sd.encode()); } else { - parts.push(sd_string); + result.push_str(NIL_VALUE); + } + if !self.message.is_empty() { + result.push(' '); } - } else if *rfc == SyslogRFC::Rfc5424 { - parts.push(NIL_VALUE.to_string()); } - if !message_part.is_empty() { - parts.push(message_part); + if !self.message.is_empty() { + if *rfc == SyslogRFC::Rfc3164 { + result.push_str(&Self::sanitize_rfc3164_message(&self.message)); + } else { + result.push_str(&self.message); + } } - let main_message = parts.join(" "); - - if *rfc == SyslogRFC::Rfc5424 { - format!("{pri_header}{SYSLOG_V1} {main_message}") - } else { - format!("{pri_header}{main_message}") - } + result } fn sanitize_rfc3164_message(message: &str) -> String { @@ -320,8 +404,8 @@ impl Tag { } else { format!("{}:", self.app_name) }; - if tag.len() > RFC3164_TAG_MAX_LENGTH { - tag.truncate(RFC3164_TAG_MAX_LENGTH); + if tag.chars().count() > RFC3164_TAG_MAX_LENGTH { + truncate_chars(&mut tag, RFC3164_TAG_MAX_LENGTH); if !tag.ends_with(':') { tag.pop(); tag.push(':'); @@ -337,7 +421,7 @@ impl Tag { } } -type StructuredDataMap = HashMap>; +type StructuredDataMap = BTreeMap>; #[derive(Debug, Default)] struct StructuredData { elements: StructuredDataMap, @@ -353,7 +437,7 @@ impl StructuredData { .fold(String::new(), |mut acc, (sd_id, sd_params)| { let _ = write!(acc, "[{sd_id}"); for (key, value) in sd_params { - let esc_val = Self::escape_sd(value); + let esc_val = escape_sd_value(value); let _ = write!(acc, " {key}=\"{esc_val}\""); } let _ = write!(acc, "]"); @@ -361,31 +445,72 @@ impl StructuredData { }) } } - - fn escape_sd(s: &str) -> String { - s.replace('\\', "\\\\") - .replace('"', "\\\"") - .replace(']', "\\]") - } } impl From for StructuredData { fn from(fields: ObjectMap) -> Self { let elements = fields .into_iter() - .flat_map(|(sd_id, value)| { - let sd_params = value - .into_object()? - .into_iter() - .map(|(k, v)| (k.into(), v.to_string_lossy().to_string())) - .collect(); - Some((sd_id.into(), sd_params)) + .map(|(sd_id, value)| { + let sd_id_str: String = sd_id.into(); + let sanitized_id = sanitize_name(&sd_id_str); + + let final_id = if sanitized_id.chars().count() > SD_ID_MAX_LENGTH { + sanitized_id.chars().take(SD_ID_MAX_LENGTH).collect() + } else { + sanitized_id.into_owned() + }; + + let sd_params = match value { + Value::Object(obj) => { + let mut map = BTreeMap::new(); + flatten_object(obj, String::new(), &mut map); + map + } + scalar => { + let mut map = BTreeMap::new(); + map.insert("value".to_string(), scalar.to_string_lossy().to_string()); + map + } + }; + (final_id, sd_params) }) .collect(); Self { elements } } } +/// Helper function to flatten nested objects with dot notation +fn flatten_object(obj: ObjectMap, prefix: String, result: &mut BTreeMap) { + for (key, value) in obj { + let key_str: String = key.into(); + + let sanitized_key = sanitize_name(&key_str); + + let mut full_key = prefix.clone(); + if !full_key.is_empty() { + full_key.push('.'); + } + full_key.push_str(&sanitized_key); + + match value { + Value::Object(nested) => { + flatten_object(nested, full_key, result); + } + Value::Array(arr) => { + if let Ok(json) = serde_json::to_string(&arr) { + result.insert(full_key, json); + } else { + result.insert(full_key, format!("{:?}", arr)); + } + } + scalar => { + result.insert(full_key, scalar.to_string_lossy().to_string()); + } + } + } +} + #[derive(Default, Debug)] struct Pri { facility: Facility, @@ -580,7 +705,8 @@ mod tests { .unwrap(); let log = create_test_log(); let output = run_encode(config, Event::Log(log)); - let expected = "<26>Aug 28 18:30:00 test-host.com my-app[12345]: [metrics retries=\"3\"] original message"; + // RFC 3164 does not support structured data, so it's ignored + let expected = "<26>Aug 28 18:30:00 test-host.com my-app[12345]: original message"; assert_eq!(output, expected); } @@ -847,4 +973,235 @@ mod tests { let output = run_encode(config, event); assert!(output.contains("meaning-app - -")); } + + #[test] + fn test_structured_data_with_scalars() { + let config = toml::from_str::( + r#" + [syslog] + rfc = "rfc5424" + "#, + ) + .unwrap(); + + let mut log = create_simple_log(); + log.insert( + event_path!("structured_data"), + value!({"simple_string": "hello", "simple_number": 42}), + ); + + let output = run_encode(config, Event::Log(log)); + assert!(output.contains(r#"[simple_number value="42"]"#)); + assert!(output.contains(r#"[simple_string value="hello"]"#)); + } + + #[test] + fn test_structured_data_with_nested_objects() { + let config = toml::from_str::( + r#" + [syslog] + rfc = "rfc5424" + "#, + ) + .unwrap(); + + let mut log = create_simple_log(); + log.insert( + event_path!("structured_data"), + value!({ + "meta": { + "request": { + "id": "abc-123", + "method": "GET" + }, + "user": "bob" + } + }), + ); + + let output = run_encode(config, Event::Log(log)); + assert!(output.contains(r#"[meta request.id="abc-123" request.method="GET" user="bob"]"#)); + } + + #[test] + fn test_structured_data_with_arrays() { + let config = toml::from_str::( + r#" + [syslog] + rfc = "rfc5424" + "#, + ) + .unwrap(); + + let mut log = create_simple_log(); + log.insert( + event_path!("structured_data"), + value!({ + "data": { + "tags": ["tag1", "tag2", "tag3"] + } + }), + ); + + let output = run_encode(config, Event::Log(log)); + // Arrays should be JSON-encoded and escaped + assert!(output.contains(r#"[data tags="[\"tag1\",\"tag2\",\"tag3\"\]"]"#)); + } + + #[test] + fn test_structured_data_complex_nested() { + let config = toml::from_str::( + r#" + [syslog] + rfc = "rfc5424" + "#, + ) + .unwrap(); + + let mut log = create_simple_log(); + log.insert( + event_path!("structured_data"), + value!({ + "tracking": { + "session": { + "user": { + "id": "123", + "name": "alice" + }, + "duration_ms": 5000 + } + } + }), + ); + + let output = run_encode(config, Event::Log(log)); + assert!(output.contains(r#"session.duration_ms="5000""#)); + assert!(output.contains(r#"session.user.id="123""#)); + assert!(output.contains(r#"session.user.name="alice""#)); + } + + #[test] + fn test_structured_data_sanitization() { + let config = toml::from_str::( + r#" + [syslog] + rfc = "rfc5424" + "#, + ) + .unwrap(); + + let mut log = create_simple_log(); + log.insert( + event_path!("structured_data"), + value!({ + "my id": { // SD-ID with space - should be sanitized to my_id + "user=name": "alice", // PARAM-NAME with = - should be sanitized to user_name + "foo]bar": "value1", // PARAM-NAME with ] - should be sanitized to foo_bar + "has\"quote": "value2" // PARAM-NAME with " - should be sanitized to has_quote + } + }), + ); + + let output = run_encode(config, Event::Log(log)); + // All invalid characters should be replaced with _ + assert!(output.contains(r#"[my_id"#)); + assert!(output.contains(r#"foo_bar="value1""#)); + assert!(output.contains(r#"has_quote="value2""#)); + assert!(output.contains(r#"user_name="alice""#)); + } + + #[test] + fn test_structured_data_sd_id_length_limit() { + let config = toml::from_str::( + r#" + [syslog] + rfc = "rfc5424" + "#, + ) + .unwrap(); + + let mut log = create_simple_log(); + log.insert( + event_path!("structured_data"), + value!({ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa": { + "key": "value" + } + }), + ); + + let output = run_encode(config, Event::Log(log)); + let expected_id = "a".repeat(32); + assert!(output.contains(&format!("[{}", expected_id))); + assert!(!output.contains(&format!("[{}", "a".repeat(50)))); + } + + #[test] + fn test_utf8_safe_truncation() { + let config = toml::from_str::( + r#" + [syslog] + rfc = "rfc5424" + app_name = ".app" + proc_id = ".proc" + msg_id = ".msg" + "#, + ) + .unwrap(); + + let mut log = create_simple_log(); + // Create fields with UTF-8 characters (emoji, Cyrillic, etc.) each emoji is 4 bytes + log.insert( + event_path!("app"), + "app_😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀", + ); + log.insert( + event_path!("proc"), + "процес_😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀", + ); + log.insert(event_path!("msg"), "довге_повідомлення "); + + log.insert( + event_path!("structured_data"), + value!({ + "_😀_дуже_довге_значення_більше_тридцати_двух_символів": { + "_😀_": "value" + } + }), + ); + let output = run_encode(config, Event::Log(log)); + assert!(output.starts_with("<14>1")); + assert!(output.contains("app_")); + + let expected_sd_id: String = "_".repeat(32); + assert!(output.contains(&format!("[{}", expected_sd_id))); + } + + #[test] + fn test_rfc3164_ascii_sanitization() { + let config = toml::from_str::( + r#" + [syslog] + rfc = "rfc3164" + app_name = ".app" + proc_id = ".proc" + "#, + ) + .unwrap(); + + let mut log = create_simple_log(); + // Use non-ASCII characters in app_name and proc_id + log.insert(event_path!("app"), "my_app_😀_тест"); + log.insert(event_path!("proc"), "процес_123"); + + let output = run_encode(config, Event::Log(log)); + + assert!(output.starts_with("<14>")); + assert!(output.contains("my_app_____")); + assert!(output.contains("[_______123]:")); + + assert!(!output.contains("😀")); + assert!(!output.contains("тест")); + assert!(!output.contains("процес")); + } } diff --git a/lib/vector-buffers/Cargo.toml b/lib/vector-buffers/Cargo.toml index 0c2fb5de4f0be..02b43c29e382f 100644 --- a/lib/vector-buffers/Cargo.toml +++ b/lib/vector-buffers/Cargo.toml @@ -23,7 +23,7 @@ futures.workspace = true memmap2 = { version = "0.9.10", default-features = false } metrics.workspace = true num-traits = { version = "0.2.19", default-features = false } -paste.workspace = true +pastey.workspace = true rkyv = { version = "0.7.46", default-features = false, features = ["size_32", "std", "strict", "validation"] } serde.workspace = true snafu.workspace = true diff --git a/lib/vector-common/Cargo.toml b/lib/vector-common/Cargo.toml index 06f3a2f87790a..95098f19b1bd3 100644 --- a/lib/vector-common/Cargo.toml +++ b/lib/vector-common/Cargo.toml @@ -43,7 +43,7 @@ futures.workspace = true indexmap.workspace = true itertools.workspace = true metrics.workspace = true -paste.workspace = true +pastey.workspace = true pin-project.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/lib/vector-common/src/internal_event/mod.rs b/lib/vector-common/src/internal_event/mod.rs index 369c4b9063c88..272e2344900e3 100644 --- a/lib/vector-common/src/internal_event/mod.rs +++ b/lib/vector-common/src/internal_event/mod.rs @@ -188,7 +188,7 @@ macro_rules! registered_event { $(fn register($fixed_name:ident: $fixed_tags:ty, $tags_name:ident: $tags:ty) $register_body:block)? ) => { - paste::paste!{ + pastey::paste!{ #[derive(Clone)] pub struct [<$event Handle>] { $( $field: $type, )* diff --git a/lib/vector-core/src/config/global_options.rs b/lib/vector-core/src/config/global_options.rs index 329cf1a58e006..d86df49803552 100644 --- a/lib/vector-core/src/config/global_options.rs +++ b/lib/vector-core/src/config/global_options.rs @@ -58,7 +58,7 @@ pub enum WildcardMatching { // // If this is modified, make sure those changes are reflected in the `ConfigBuilder::append` // function! -#[configurable_component(global_option("global_option"))] +#[configurable_component] #[derive(Clone, Debug, Default, PartialEq)] pub struct GlobalOptions { /// The directory used for persisting Vector state data. diff --git a/lib/vector-lib/Cargo.toml b/lib/vector-lib/Cargo.toml index 6c46f70a019bd..6539c82dfaa80 100644 --- a/lib/vector-lib/Cargo.toml +++ b/lib/vector-lib/Cargo.toml @@ -7,7 +7,7 @@ publish = false [dependencies] codecs = { path = "../codecs", default-features = false } -enrichment = { path = "../enrichment" } +enrichment = { path = "../vector-vrl/enrichment" } file-source = { path = "../file-source", optional = true } file-source-common = { path = "../file-source-common", optional = true } opentelemetry-proto = { path = "../opentelemetry-proto", optional = true } diff --git a/lib/dnstap-parser/Cargo.toml b/lib/vector-vrl/dnstap-parser/Cargo.toml similarity index 64% rename from lib/dnstap-parser/Cargo.toml rename to lib/vector-vrl/dnstap-parser/Cargo.toml index 8768274b3d3e0..6e30f22732fd9 100644 --- a/lib/dnstap-parser/Cargo.toml +++ b/lib/vector-vrl/dnstap-parser/Cargo.toml @@ -7,20 +7,19 @@ publish = false license = "MIT" [dependencies] -base64.workspace = true +base64 = { workspace = true, features = ["alloc"] } bytes = { workspace = true, features = ["serde"] } chrono.workspace = true -dnsmsg-parser = { path = "../dnsmsg-parser" } +dnsmsg-parser = { path = "../../dnsmsg-parser" } hickory-proto.workspace = true prost.workspace = true snafu.workspace = true tracing.workspace = true vector-config.workspace = true -vector-common = { path = "../vector-common" } -vector-lookup = { path = "../vector-lookup", features = ["test"] } -vector-core = { path = "../vector-core" } +vector-common = { path = "../../vector-common" } +vector-lookup = { path = "../../vector-lookup", features = ["test"] } +vector-core = { path = "../../vector-core" } vrl.workspace = true -paste.workspace = true [build-dependencies] prost-build.workspace = true @@ -28,3 +27,4 @@ prost-build.workspace = true [dev-dependencies] anyhow.workspace = true chrono-tz.workspace = true +pastey.workspace = true diff --git a/lib/dnstap-parser/build.rs b/lib/vector-vrl/dnstap-parser/build.rs similarity index 100% rename from lib/dnstap-parser/build.rs rename to lib/vector-vrl/dnstap-parser/build.rs diff --git a/lib/dnstap-parser/proto/dnstap.proto b/lib/vector-vrl/dnstap-parser/proto/dnstap.proto similarity index 100% rename from lib/dnstap-parser/proto/dnstap.proto rename to lib/vector-vrl/dnstap-parser/proto/dnstap.proto diff --git a/lib/dnstap-parser/src/internal_events.rs b/lib/vector-vrl/dnstap-parser/src/internal_events.rs similarity index 100% rename from lib/dnstap-parser/src/internal_events.rs rename to lib/vector-vrl/dnstap-parser/src/internal_events.rs diff --git a/lib/dnstap-parser/src/lib.rs b/lib/vector-vrl/dnstap-parser/src/lib.rs similarity index 100% rename from lib/dnstap-parser/src/lib.rs rename to lib/vector-vrl/dnstap-parser/src/lib.rs diff --git a/lib/dnstap-parser/src/parser.rs b/lib/vector-vrl/dnstap-parser/src/parser.rs similarity index 100% rename from lib/dnstap-parser/src/parser.rs rename to lib/vector-vrl/dnstap-parser/src/parser.rs diff --git a/lib/dnstap-parser/src/schema.rs b/lib/vector-vrl/dnstap-parser/src/schema.rs similarity index 100% rename from lib/dnstap-parser/src/schema.rs rename to lib/vector-vrl/dnstap-parser/src/schema.rs diff --git a/lib/dnstap-parser/src/vrl_functions/mod.rs b/lib/vector-vrl/dnstap-parser/src/vrl_functions/mod.rs similarity index 100% rename from lib/dnstap-parser/src/vrl_functions/mod.rs rename to lib/vector-vrl/dnstap-parser/src/vrl_functions/mod.rs diff --git a/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs b/lib/vector-vrl/dnstap-parser/src/vrl_functions/parse_dnstap.rs similarity index 100% rename from lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs rename to lib/vector-vrl/dnstap-parser/src/vrl_functions/parse_dnstap.rs diff --git a/lib/enrichment/Cargo.toml b/lib/vector-vrl/enrichment/Cargo.toml similarity index 100% rename from lib/enrichment/Cargo.toml rename to lib/vector-vrl/enrichment/Cargo.toml diff --git a/lib/enrichment/LICENSE b/lib/vector-vrl/enrichment/LICENSE similarity index 100% rename from lib/enrichment/LICENSE rename to lib/vector-vrl/enrichment/LICENSE diff --git a/lib/enrichment/src/find_enrichment_table_records.rs b/lib/vector-vrl/enrichment/src/find_enrichment_table_records.rs similarity index 100% rename from lib/enrichment/src/find_enrichment_table_records.rs rename to lib/vector-vrl/enrichment/src/find_enrichment_table_records.rs diff --git a/lib/enrichment/src/get_enrichment_table_record.rs b/lib/vector-vrl/enrichment/src/get_enrichment_table_record.rs similarity index 100% rename from lib/enrichment/src/get_enrichment_table_record.rs rename to lib/vector-vrl/enrichment/src/get_enrichment_table_record.rs diff --git a/lib/enrichment/src/lib.rs b/lib/vector-vrl/enrichment/src/lib.rs similarity index 99% rename from lib/enrichment/src/lib.rs rename to lib/vector-vrl/enrichment/src/lib.rs index d538a05ca5bbd..df4575e91f08e 100644 --- a/lib/enrichment/src/lib.rs +++ b/lib/vector-vrl/enrichment/src/lib.rs @@ -158,7 +158,7 @@ pub(crate) const ENRICHMENT_TABLE_EXPLAINER: &str = indoc! {r#" This function returns the rows that match the provided condition(s). _All_ fields need to match for rows to be returned; if any fields do not match, then no rows are returned. - There are currently three forms of search criteria: + There are three forms of search criteria: 1. **Exact match search**. The given field must match the value exactly. Case sensitivity can be specified using the `case_sensitive` argument. An exact match search can use an diff --git a/lib/enrichment/src/tables.rs b/lib/vector-vrl/enrichment/src/tables.rs similarity index 100% rename from lib/enrichment/src/tables.rs rename to lib/vector-vrl/enrichment/src/tables.rs diff --git a/lib/enrichment/src/test_util.rs b/lib/vector-vrl/enrichment/src/test_util.rs similarity index 100% rename from lib/enrichment/src/test_util.rs rename to lib/vector-vrl/enrichment/src/test_util.rs diff --git a/lib/enrichment/src/vrl_util.rs b/lib/vector-vrl/enrichment/src/vrl_util.rs similarity index 100% rename from lib/enrichment/src/vrl_util.rs rename to lib/vector-vrl/enrichment/src/vrl_util.rs diff --git a/lib/vector-vrl/functions/Cargo.toml b/lib/vector-vrl/functions/Cargo.toml index 973c32e565aab..874ffbd759ad6 100644 --- a/lib/vector-vrl/functions/Cargo.toml +++ b/lib/vector-vrl/functions/Cargo.toml @@ -9,12 +9,12 @@ license = "MPL-2.0" [dependencies] indoc.workspace = true vrl.workspace = true -enrichment = { path = "../../enrichment" } -dnstap-parser = { path = "../../dnstap-parser", optional = true } -vector-vrl-metrics = { path = "../../vector-vrl-metrics", optional = true } +enrichment = { path = "../enrichment" } +dnstap-parser = { path = "../dnstap-parser", optional = true } +vector-vrl-metrics = { path = "../metrics", optional = true } vector-vrl-category.workspace = true [features] -default = [] +default = ["dnstap", "vrl-metrics"] dnstap = ["dep:dnstap-parser"] vrl-metrics = ["dep:vector-vrl-metrics"] diff --git a/lib/vector-vrl/functions/src/lib.rs b/lib/vector-vrl/functions/src/lib.rs index afb2f3ce3d43a..d3846912aaba8 100644 --- a/lib/vector-vrl/functions/src/lib.rs +++ b/lib/vector-vrl/functions/src/lib.rs @@ -38,9 +38,19 @@ pub fn secret_functions() -> Vec> { /// Returns all VRL functions available in Vector. #[allow(clippy::disallowed_methods)] pub fn all() -> Vec> { - let functions = vrl::stdlib::all() + let functions = iter_all_without_vrl_stdlib().chain(vrl::stdlib::all()); + functions.collect() +} + +/// Returns all VRL functions available only in Vector. +pub fn all_without_vrl_stdlib() -> Vec> { + let functions = iter_all_without_vrl_stdlib(); + functions.collect() +} + +fn iter_all_without_vrl_stdlib() -> impl Iterator> { + let functions = secret_functions() .into_iter() - .chain(secret_functions()) .chain(enrichment::vrl_functions()); #[cfg(feature = "dnstap")] @@ -49,5 +59,5 @@ pub fn all() -> Vec> { #[cfg(feature = "vrl-metrics")] let functions = functions.chain(vector_vrl_metrics::all()); - functions.collect() + functions } diff --git a/lib/vector-vrl-metrics/Cargo.toml b/lib/vector-vrl/metrics/Cargo.toml similarity index 67% rename from lib/vector-vrl-metrics/Cargo.toml rename to lib/vector-vrl/metrics/Cargo.toml index 6f6a30b66c364..38c29ceaf8ceb 100644 --- a/lib/vector-vrl-metrics/Cargo.toml +++ b/lib/vector-vrl/metrics/Cargo.toml @@ -10,8 +10,8 @@ license = "MPL-2.0" arc-swap.workspace = true const-str.workspace = true vrl.workspace = true -vector-core = { path = "../vector-core", default-features = false, features = ["vrl"] } -vector-common = { path = "../vector-common", default-features = false } +vector-core = { path = "../../vector-core", default-features = false, features = ["vrl"] } +vector-common = { path = "../../vector-common", default-features = false } tokio.workspace = true tokio-stream.workspace = true vector-vrl-category.workspace = true diff --git a/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs b/lib/vector-vrl/metrics/src/aggregate_vector_metrics.rs similarity index 97% rename from lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs rename to lib/vector-vrl/metrics/src/aggregate_vector_metrics.rs index 090d51f8a864d..645334cbccc5e 100644 --- a/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs +++ b/lib/vector-vrl/metrics/src/aggregate_vector_metrics.rs @@ -14,7 +14,7 @@ use crate::common::{Error, MetricsStorage}; static DEFAULT_TAGS: LazyLock = LazyLock::new(|| Value::Object(BTreeMap::new())); static PARAMETERS: LazyLock> = LazyLock::new(|| { vec![ - Parameter::required("function", kind::BYTES, "The metric name to search.") + Parameter::required("function", kind::BYTES, "The aggregation function to apply to the matched metrics.") .enum_variants(&[ EnumVariant { value: "sum", @@ -125,7 +125,7 @@ impl Function for AggregateVectorMetrics { }, example! { title: "Min of vector internal metrics matching the name", - source: r#"aggregate_vector_metrics("max", "utilization")"#, + source: r#"aggregate_vector_metrics("min", "utilization")"#, result: Ok("0.5"), }, ] diff --git a/lib/vector-vrl-metrics/src/common.rs b/lib/vector-vrl/metrics/src/common.rs similarity index 100% rename from lib/vector-vrl-metrics/src/common.rs rename to lib/vector-vrl/metrics/src/common.rs diff --git a/lib/vector-vrl-metrics/src/find_vector_metrics.rs b/lib/vector-vrl/metrics/src/find_vector_metrics.rs similarity index 100% rename from lib/vector-vrl-metrics/src/find_vector_metrics.rs rename to lib/vector-vrl/metrics/src/find_vector_metrics.rs diff --git a/lib/vector-vrl-metrics/src/get_vector_metric.rs b/lib/vector-vrl/metrics/src/get_vector_metric.rs similarity index 100% rename from lib/vector-vrl-metrics/src/get_vector_metric.rs rename to lib/vector-vrl/metrics/src/get_vector_metric.rs diff --git a/lib/vector-vrl-metrics/src/lib.rs b/lib/vector-vrl/metrics/src/lib.rs similarity index 100% rename from lib/vector-vrl-metrics/src/lib.rs rename to lib/vector-vrl/metrics/src/lib.rs diff --git a/lib/vector-vrl/tests/Cargo.toml b/lib/vector-vrl/tests/Cargo.toml index 411174e96068d..ca477616e3b43 100644 --- a/lib/vector-vrl/tests/Cargo.toml +++ b/lib/vector-vrl/tests/Cargo.toml @@ -7,11 +7,11 @@ publish = false [dependencies] chrono-tz.workspace = true -vector-vrl-functions = { workspace = true, features = ["dnstap"] } -enrichment = { path = "../../enrichment" } -vector-vrl-metrics = { path = "../../vector-vrl-metrics" } +vector-vrl-functions = { workspace = true, features = ["dnstap", "vrl-metrics"] } +enrichment = { path = "../enrichment" } +vector-vrl-metrics = { path = "../metrics" } vector-core = { path = "../../vector-core", default-features = false, features = ["vrl"] } -vrl.workspace = true +vrl = { workspace = true, features = ["stdlib"] } clap.workspace = true glob.workspace = true diff --git a/lib/vector-vrl/tests/src/docs.rs b/lib/vector-vrl/tests/src/docs.rs index e55385c2ffd06..8c25745e5aefc 100644 --- a/lib/vector-vrl/tests/src/docs.rs +++ b/lib/vector-vrl/tests/src/docs.rs @@ -192,6 +192,7 @@ fn test_from_cue_example(category: &'static str, name: String, example: Example) skip, read_only_paths: vec![], check_diagnostics: false, + check_type_only: false, source_file: format!("website/cue/reference/remap/functions/{name}.cue"), source_line: 1, } diff --git a/lib/vector-vrl/tests/src/main.rs b/lib/vector-vrl/tests/src/main.rs index f205efb7f4452..4d3b54049327a 100644 --- a/lib/vector-vrl/tests/src/main.rs +++ b/lib/vector-vrl/tests/src/main.rs @@ -95,6 +95,7 @@ fn main() { timings: cmd.timings, runtime: cmd.runtime, timezone: cmd.timezone(), + run_skipped: false, }; run_tests( diff --git a/lib/vector-vrl/web-playground/Cargo.toml b/lib/vector-vrl/web-playground/Cargo.toml index 85cbff4e78beb..d84b2c8d69ad4 100644 --- a/lib/vector-vrl/web-playground/Cargo.toml +++ b/lib/vector-vrl/web-playground/Cargo.toml @@ -17,8 +17,8 @@ vrl.workspace = true serde.workspace = true web-sys = { version = "0.3", features = ["Window", "Performance"] } gloo-utils = { version = "0.2", features = ["serde"] } -vector-vrl-functions = { workspace = true, features = [] } -enrichment = { path = "../../enrichment" } +vector-vrl-functions.workspace = true +enrichment = { path = "../enrichment" } # Required per https://docs.rs/getrandom/latest/getrandom/#webassembly-support getrandom = { version = "0.2.15", features = ["js"] } diff --git a/regression/config.yaml b/regression/config.yaml index b9b54de25b372..e7a3f5b1e5dab 100644 --- a/regression/config.yaml +++ b/regression/config.yaml @@ -1,5 +1,5 @@ lading: - version: 0.25.4 + version: 0.31.2 target: diff --git a/scripts/build-docker.sh b/scripts/build-docker.sh index cd0474cf6bf89..fb1a0ab13b065 100755 --- a/scripts/build-docker.sh +++ b/scripts/build-docker.sh @@ -10,8 +10,10 @@ set -euo pipefail set -x -CHANNEL="${CHANNEL:-"$(cargo vdev release channel)"}" -VERSION="${VECTOR_VERSION:-"$(cargo vdev version)"}" +vdev_cmd="${VDEV:-cargo vdev}" + +CHANNEL="${CHANNEL:-"$($vdev_cmd release channel)"}" +VERSION="${VECTOR_VERSION:-"$($vdev_cmd version)"}" DATE="${DATE:-"$(date -u +%Y-%m-%d)"}" PLATFORM="${PLATFORM:-}" PUSH="${PUSH:-"true"}" diff --git a/scripts/build.sh b/scripts/build.sh index 26198e9a49f58..874e6ed69a5b2 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -24,7 +24,7 @@ FEATURES="${FEATURES:-"default"}" NATIVE_BUILD="${NATIVE_BUILD:-"true"}" TARGET="${TARGET:?"You must specify a target triple, ex: arm64-apple-darwin"}" -CHANNEL=${CHANNEL:-"$(cargo vdev release channel)"} +CHANNEL=${CHANNEL:-"$(${VDEV:-cargo vdev} release channel)"} if [ "$CHANNEL" == "nightly" ]; then FEATURES="$FEATURES nightly" fi diff --git a/scripts/cue.sh b/scripts/cue.sh index 6a1b499a1afe5..5bf6ba6e67044 100755 --- a/scripts/cue.sh +++ b/scripts/cue.sh @@ -16,7 +16,7 @@ list-docs-files() { } cmd_check() { - cargo vdev check docs + ${VDEV:-cargo vdev} check docs } cmd_list() { @@ -24,7 +24,8 @@ cmd_list() { } cmd_fmt() { - list-docs-files | xargs cue fmt "$@" + # Ignore JSON-style cue files generated from VRL source code + list-docs-files | grep -v "${CUE_SOURCES}/reference/remap/functions/" | xargs cue fmt "$@" } cmd_vet() { diff --git a/scripts/environment/Dockerfile b/scripts/environment/Dockerfile index 2f5cc8e91d737..fa7d72075dae6 100644 --- a/scripts/environment/Dockerfile +++ b/scripts/environment/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/ubuntu:24.04 +FROM docker.io/ubuntu:24.04@sha256:d1e2e92c075e5ca139d51a140fff46f84315c0fdce203eab2807c7e495eff4f9 ENV DEBIAN_FRONTEND=noninteractive \ TZ='America/New York' \ PATH=/root/.cargo/bin:/root/.local/bin/:$PATH \ diff --git a/scripts/environment/bootstrap-windows-2025.ps1 b/scripts/environment/bootstrap-windows-2025.ps1 index ca7a26a600739..862efc554a187 100644 --- a/scripts/environment/bootstrap-windows-2025.ps1 +++ b/scripts/environment/bootstrap-windows-2025.ps1 @@ -2,6 +2,29 @@ $ErrorActionPreference = "Stop" Set-StrictMode -Version Latest +# Helper function to install choco packages with exponential backoff retry +function Install-ChocoPackage { + param( + [string]$Package, + [int]$MaxRetries = 5 + ) + + for ($attempt = 1; $attempt -le $MaxRetries; $attempt++) { + choco install $Package --execution-timeout=7200 -y + if ($LASTEXITCODE -eq 0) { + return + } + + if ($attempt -lt $MaxRetries) { + $delay = 5 * [math]::Pow(2, $attempt) # Exponential: 10, 20, 40, 80 seconds + Write-Host "choco install $Package failed (attempt $attempt of $MaxRetries). Retrying in $delay seconds..." + Start-Sleep -Seconds $delay + } else { + throw "choco install $Package failed after $MaxRetries attempts" + } + } +} + # Set up our Cargo path so we can do Rust-y things. echo "$HOME\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append @@ -11,11 +34,9 @@ if ($env:RELEASE_BUILDER -ne "true") { bash scripts/environment/prepare.sh --modules=rustup } -# Enable retries to avoid transient network issues. -$env:NUGET_ENABLE_ENHANCED_HTTP_RETRY = "true" - -choco install make -choco install protoc +# Install Chocolatey packages with exponential backoff retry +Install-ChocoPackage "make" +Install-ChocoPackage "protoc" # Set a specific override path for libclang. echo "LIBCLANG_PATH=$( (gcm clang).source -replace "clang.exe" )" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append diff --git a/scripts/environment/install-protoc.sh b/scripts/environment/install-protoc.sh index dc0eaa09037a2..4d1debe6c3251 100755 --- a/scripts/environment/install-protoc.sh +++ b/scripts/environment/install-protoc.sh @@ -63,4 +63,4 @@ install_protoc() { mv -f -v "${TMP_DIR}/bin/protoc" "${install_path}" } -install_protoc "3.20.2" "${INSTALL_PATH}/protoc" +install_protoc "21.12" "${INSTALL_PATH}/protoc" diff --git a/scripts/environment/prepare.sh b/scripts/environment/prepare.sh index 1becf54d13552..6cd3349a61c58 100755 --- a/scripts/environment/prepare.sh +++ b/scripts/environment/prepare.sh @@ -37,8 +37,8 @@ CARGO_HACK_VERSION="0.6.43" DD_RUST_LICENSE_TOOL_VERSION="1.0.5" WASM_PACK_VERSION="0.13.1" MARKDOWNLINT_VERSION="0.45.0" -DATADOG_CI_VERSION="5.8.0" -VDEV_VERSION="0.1.0" +DATADOG_CI_VERSION="5.9.0" +VDEV_VERSION="0.3.0" ALL_MODULES=( rustup diff --git a/scripts/generate-component-docs.rb b/scripts/generate-component-docs.rb index 5506212d1b30c..9f5fa7f695434 100755 --- a/scripts/generate-component-docs.rb +++ b/scripts/generate-component-docs.rb @@ -781,6 +781,16 @@ def resolve_schema(root_schema, schema) resolved['required'] = is_required_field end + # Resolve any warnings attached to this option. + # + # Warnings can be specified in Rust via `#[configurable(metadata(docs::warnings = "..."))]`. + # Multiple warnings can be specified by repeating the attribute, and they will be emitted as an + # array in the CUE output. + warnings = get_schema_metadata(schema, 'docs::warnings') + if !warnings.nil? + resolved['warnings'] = warnings.is_a?(Array) ? warnings : [warnings] + end + # Reconcile the resolve schema, which essentially gives us a chance to, once the schema is # entirely resolved, check it for logical inconsistencies, fix up anything that we reasonably can, # and so on. @@ -1722,44 +1732,134 @@ def render_and_import_component_schema(root_schema, schema_name, component_type, ) end -def render_and_import_generated_api_schema(root_schema, apis) - api_schema = {} - apis.each do |component_name, schema_name| - friendly_name = "'#{component_name}' #{schema_name} configuration" - resolved_schema = unwrap_resolved_schema(root_schema, schema_name, friendly_name) - api_schema[component_name] = resolved_schema +def render_and_import_generated_top_level_config_schema(root_schema) + top_level_config_schema = {} + + # Define logical groupings for top-level configuration fields + # These groups will be used to organize separate documentation pages + field_groups = { + # Pipeline component containers + 'sources' => 'pipeline_components', + 'transforms' => 'pipeline_components', + 'sinks' => 'pipeline_components', + 'enrichment_tables' => 'pipeline_components', + + # Individual feature pages + 'api' => 'api', + 'schema' => 'schema', + 'log_schema' => 'schema', + 'secret' => 'secrets', + + # Global options (everything else defaults to this) + } + + group_metadata = { + 'global_options' => { + 'title' => 'Global Options', + 'description' => 'Global configuration options that apply to Vector as a whole.', + 'order' => 1 + }, + 'pipeline_components' => { + 'title' => 'Pipeline Components', + 'description' => 'Configure sources, transforms, sinks, and enrichment tables for your observability pipeline.', + 'order' => 2 + }, + 'api' => { + 'title' => 'API', + 'description' => 'Configure Vector\'s observability API.', + 'order' => 3 + }, + 'schema' => { + 'title' => 'Schema', + 'description' => 'Configure Vector\'s internal schema system for type tracking and validation.', + 'order' => 4 + }, + 'secrets' => { + 'title' => 'Secrets', + 'description' => 'Configure secrets management for secure configuration.', + 'order' => 5 + } + } + + # Usage of #[serde(flatten)] creates multiple schemas in the `allOf` array: + # - One or more schemas contain ConfigBuilder's direct fields + # - One or more schemas contain flattened GlobalOptions fields + all_of_schemas = root_schema['allOf'] || [] + + if all_of_schemas.empty? + @logger.error "Could not find ConfigBuilder allOf schemas in root schema" + return end - render_and_import_schema( - api_schema, - "configuration", - ["generated", "api"], - "generated/api.cue" - ) -end + # Collect all properties from all allOf schemas into a single hash. + # Since ConfigBuilder uses #[serde(flatten)], field names are unique across all schemas. + all_properties = all_of_schemas.reduce({}) do |acc, schema| + acc.merge(schema['properties'] || {}) + end -def render_and_import_generated_global_option_schema(root_schema, global_options) - global_option_schema = {} + @logger.info "[*] Found #{all_properties.keys.length} total properties across #{all_of_schemas.length} allOf schemas" - global_options.each do |component_name, schema_name| - friendly_name = "'#{component_name}' #{schema_name} configuration" + # Process each property once + all_properties.each do |field_name, field_schema| + # Skip fields marked with docs::hidden + metadata = field_schema['_metadata'] || {} + if metadata['docs::hidden'] + @logger.info "[*] Skipping '#{field_name}' (marked as docs::hidden)" + next + end - if component_name == "global_option" - # Flattening global options - unwrap_resolved_schema(root_schema, schema_name, friendly_name) - .each { |name, schema| global_option_schema[name] = schema } + # Extract and resolve the field + @logger.info "[*] Extracting '#{field_name}' field from ConfigBuilder..." + resolved_field = resolve_schema(root_schema, field_schema) + + # Assign group metadata to organize the documentation + if field_groups.key?(field_name) + group_name = field_groups[field_name] + resolved_field['group'] = group_name + @logger.debug "Assigned '#{field_name}' to group '#{group_name}'" else - # Resolving and assigning other global options - global_option_schema[component_name] = resolve_schema_by_name(root_schema, schema_name) + # Default to global_options for any fields not explicitly grouped + resolved_field['group'] = 'global_options' + @logger.debug "Assigned '#{field_name}' to default group 'global_options'" end + + top_level_config_schema[field_name] = resolved_field + @logger.info "[✓] Resolved '#{field_name}'" end - render_and_import_schema( - global_option_schema, - "configuration", - ["generated", "configuration"], - "generated/configuration.cue" - ) + # Build the final data structure with both configuration and group metadata + friendly_name = "configuration" + config_map_path = ["generated", "configuration"] + cue_relative_path = "generated/configuration.cue" + + # Set up the structure for the value based on the configuration map path + data = {} + last = data + config_map_path.each do |segment| + last[segment] = {} if last[segment].nil? + last = last[segment] + end + + # Add both the configuration schema and the group metadata + last['configuration'] = top_level_config_schema + last['groups'] = group_metadata + + config_map_path.prepend('config-schema-base') + tmp_file_prefix = config_map_path.join('-') + final_json = to_pretty_json(data) + + # Write the resolved schema as JSON + json_output_file = write_to_temp_file(["config-schema-#{tmp_file_prefix}-", '.json'], final_json) + @logger.info "[✓] Wrote #{friendly_name} schema to '#{json_output_file}'. (#{final_json.length} bytes)" + + # Import it as Cue + @logger.info "[*] Importing #{friendly_name} schema as Cue file..." + cue_output_file = "website/cue/reference/#{cue_relative_path}" + unless system(@cue_binary_path, 'import', '-f', '-o', cue_output_file, '-p', 'metadata', json_output_file) + @logger.error "[!] Failed to import #{friendly_name} schema as valid Cue." + exit 1 + end + @logger.info "[✓] Imported #{friendly_name} schema to '#{cue_output_file}'." end if ARGV.empty? @@ -1808,22 +1908,7 @@ def render_and_import_generated_global_option_schema(root_schema, global_options end end -apis = root_schema['definitions'].filter_map do |key, definition| - component_type = get_schema_metadata(definition, 'docs::component_type') - component_name = get_schema_metadata(definition, 'docs::component_name') - { component_name => key } if component_type == "api" -end -.reduce { |acc, item| nested_merge(acc, item) } - -render_and_import_generated_api_schema(root_schema, apis) - - -# At last, we generate the global options configuration. -global_options = root_schema['definitions'].filter_map do |key, definition| - component_type = get_schema_metadata(definition, 'docs::component_type') - component_name = get_schema_metadata(definition, 'docs::component_name') - { component_name => key } if component_type == "global_option" -end -.reduce { |acc, item| nested_merge(acc, item) } - -render_and_import_generated_global_option_schema(root_schema, global_options) +# Finally, generate the top-level Vector configuration schema. We extract ALL top-level config fields directly from the +# ConfigBuilder struct (defined in src/config/builder.rs) by processing its allOf schemas. ConfigBuilder is the single +# source of truth for what's actually allowed at the top level of Vector's configuration file. +render_and_import_generated_top_level_config_schema(root_schema) diff --git a/scripts/package-archive.sh b/scripts/package-archive.sh index 50171611fe5a7..1f1d48586e3b1 100755 --- a/scripts/package-archive.sh +++ b/scripts/package-archive.sh @@ -24,7 +24,7 @@ OVERWRITE=${OVERWRITE:-"true"} ARCHIVE_TYPE="${ARCHIVE_TYPE:-"tar.gz"}" NATIVE_BUILD="${NATIVE_BUILD:-"true"}" TARGET="${TARGET:?"You must specify a target triple, ex: arm64-apple-darwin"}" -ARCHIVE_VERSION="${VECTOR_VERSION:-"$(cargo vdev version)"}" +ARCHIVE_VERSION="${VECTOR_VERSION:-"$(${VDEV:-cargo vdev} version)"}" # # Local Vars diff --git a/scripts/package-deb.sh b/scripts/package-deb.sh index db230998b5851..ede151f290b3b 100755 --- a/scripts/package-deb.sh +++ b/scripts/package-deb.sh @@ -19,7 +19,7 @@ TARGET="${TARGET:?"You must specify a target triple, ex: arm64-apple-darwin"}" # PROJECT_ROOT="$(pwd)" -PACKAGE_VERSION="${VECTOR_VERSION:-"$(cargo vdev version)"}" +PACKAGE_VERSION="${VECTOR_VERSION:-"$(${VDEV:-cargo vdev} version)"}" ARCHIVE_NAME="vector-${PACKAGE_VERSION}-$TARGET.tar.gz" ARCHIVE_PATH="target/artifacts/$ARCHIVE_NAME" ABSOLUTE_ARCHIVE_PATH="$PROJECT_ROOT/$ARCHIVE_PATH" @@ -71,11 +71,6 @@ cat LICENSE NOTICE >"$PROJECT_ROOT/target/debian-license.txt" # --no-build # because this step should follow a build -# TODO: Remove this after the Vector docker image contains a newer cargo-deb version. -# Temporary override of cargo-deb to support Rust 2024 edition. -if [[ "$(cargo-deb --version 2>/dev/null)" != "2.9.3" ]]; then - cargo install cargo-deb --version 2.9.3 --force --locked -fi cargo deb --target "$TARGET" --deb-version "${PACKAGE_VERSION}-1" --variant "$TARGET" --no-build --no-strip # Rename the resulting .deb file to remove TARGET from name. diff --git a/scripts/package-msi.sh b/scripts/package-msi.sh index 3c62b344dcdb9..1eba2a331af87 100644 --- a/scripts/package-msi.sh +++ b/scripts/package-msi.sh @@ -9,7 +9,9 @@ set -euo pipefail set -x -ARCHIVE_VERSION="${VECTOR_VERSION:-"$(cargo vdev version)"}" +vdev_cmd="${VDEV:-cargo vdev}" + +ARCHIVE_VERSION="${VECTOR_VERSION:-"$($vdev_cmd version)"}" rm -rf target/msi-x64 cp -R distribution/msi target/msi-x64 @@ -24,7 +26,7 @@ powershell '$progressPreference = "silentlyContinue"; Expand-Archive vector-'"$A # C:\a\vector\vector\target\msi-x64\vector.wxs(6) : error CNDL0108 : The Product/@Version attribute's value, '0.29.0.custom.a28ecdc', is not a valid version. # Legal version values should look like 'x.x.x.x' where x is an integer from 0 to 65534. # , by changing "0.29.0.custom.a28ecdc" -> "0.29.0". -CHANNEL="${CHANNEL:-"$(cargo vdev release channel)"}" +CHANNEL="${CHANNEL:-"$($vdev_cmd release channel)"}" if [[ "$CHANNEL" == "custom" ]]; then PACKAGE_VERSION="${ARCHIVE_VERSION%.custom*}" diff --git a/scripts/package-rpm.sh b/scripts/package-rpm.sh index 45176770613b7..bedb4486731cd 100755 --- a/scripts/package-rpm.sh +++ b/scripts/package-rpm.sh @@ -17,7 +17,7 @@ TARGET="${TARGET:?"You must specify a target triple, ex: arm64-apple-darwin"}" # Local vars # -PACKAGE_VERSION="${VECTOR_VERSION:-"$(cargo vdev version)"}" +PACKAGE_VERSION="${VECTOR_VERSION:-"$(${VDEV:-cargo vdev} version)"}" ARCHIVE_NAME="vector-$PACKAGE_VERSION-$TARGET.tar.gz" ARCHIVE_PATH="target/artifacts/$ARCHIVE_NAME" @@ -63,11 +63,19 @@ cp -av distribution/systemd/. "$RPMBUILD_DIR/SOURCES/systemd" # Copy the archive into the sources dir cp -av "$ARCHIVE_PATH" "$RPMBUILD_DIR/SOURCES/vector-$ARCH.tar.gz" +# Determine the correct strip tool for cross-compilation. +case "$TARGET" in + aarch64-*) STRIP_TOOL="aarch64-linux-gnu-strip" ;; + armv7-*-gnueabihf) STRIP_TOOL="arm-linux-gnueabihf-strip" ;; + *) STRIP_TOOL="strip" ;; +esac + # Perform the build. rpmbuild \ --define "_topdir $RPMBUILD_DIR" \ --target "$ARCH-redhat-linux" \ --define "_arch $ARCH" \ + --define "__strip $STRIP_TOOL" \ --nodebuginfo \ -ba distribution/rpm/vector.spec diff --git a/scripts/release-s3.sh b/scripts/release-s3.sh index c082aa7a3f410..9889fa556e4eb 100755 --- a/scripts/release-s3.sh +++ b/scripts/release-s3.sh @@ -7,8 +7,10 @@ set -euo pipefail # # Uploads archives and packages to S3 -CHANNEL="${CHANNEL:-"$(cargo vdev release channel)"}" -VERSION="${VECTOR_VERSION:-"$(cargo vdev version)"}" +vdev_cmd="${VDEV:-cargo vdev}" + +CHANNEL="${CHANNEL:-"$($vdev_cmd release channel)"}" +VERSION="${VECTOR_VERSION:-"$($vdev_cmd version)"}" DATE="${DATE:-"$(date -u +%Y-%m-%d)"}" VERIFY_TIMEOUT="${VERIFY_TIMEOUT:-"30"}" # seconds VERIFY_RETRIES="${VERIFY_RETRIES:-"2"}" diff --git a/scripts/run-integration-test.sh b/scripts/run-integration-test.sh index 5a5782ec433c2..a1b22ce4d6c4c 100755 --- a/scripts/run-integration-test.sh +++ b/scripts/run-integration-test.sh @@ -10,6 +10,8 @@ if [[ "${ACTIONS_RUNNER_DEBUG:-}" == "true" ]]; then set -x fi +vdev_cmd="${VDEV:-cargo vdev}" + print_compose_logs_on_failure() { local LAST_RETURN_CODE=$1 if [[ "$LAST_RETURN_CODE" -ne 0 || "${ACTIONS_RUNNER_DEBUG:-}" == "true" ]]; then @@ -104,7 +106,7 @@ if [[ ${#TEST_ENV} -gt 0 ]]; then TEST_ENVIRONMENTS="${TEST_ENV}" else # Collect all available environments via auto-discovery - mapfile -t TEST_ENVIRONMENTS < <(cargo vdev "${VERBOSITY}" "${TEST_TYPE}" show -e "${TEST_NAME}") + mapfile -t TEST_ENVIRONMENTS < <($vdev_cmd "${VERBOSITY}" "${TEST_TYPE}" show -e "${TEST_NAME}") if [[ ${#TEST_ENVIRONMENTS[@]} -eq 0 ]]; then echo "ERROR: no environments found for ${TEST_TYPE} test '${TEST_NAME}'" >&2 exit 1 @@ -127,12 +129,12 @@ for TEST_ENV in "${TEST_ENVIRONMENTS[@]}"; do docker run --rm -v vector_target:/output/"${TEST_NAME}" alpine:3.20 \ sh -c "rm -rf /output/${TEST_NAME}/*" - cargo vdev "${VERBOSITY}" "${TEST_TYPE}" start "${TEST_NAME}" "${TEST_ENV}" + $vdev_cmd "${VERBOSITY}" "${TEST_TYPE}" start "${TEST_NAME}" "${TEST_ENV}" START_RET=$? print_compose_logs_on_failure "$START_RET" if [[ "$START_RET" -eq 0 ]]; then - cargo vdev "${VERBOSITY}" "${TEST_TYPE}" test --retries "$RETRIES" "${TEST_NAME}" "${TEST_ENV}" + $vdev_cmd "${VERBOSITY}" "${TEST_TYPE}" test --retries "$RETRIES" "${TEST_NAME}" "${TEST_ENV}" RET=$? print_compose_logs_on_failure "$RET" @@ -144,7 +146,7 @@ for TEST_ENV in "${TEST_ENVIRONMENTS[@]}"; do fi # Always stop the environment (best effort cleanup) - cargo vdev "${VERBOSITY}" "${TEST_TYPE}" stop "${TEST_NAME}" || true + $vdev_cmd "${VERBOSITY}" "${TEST_TYPE}" stop "${TEST_NAME}" || true # Exit early on first failure if [[ "$RET" -ne 0 ]]; then diff --git a/src/config/api.rs b/src/config/api.rs index fca088f751b39..5f99618c166f6 100644 --- a/src/config/api.rs +++ b/src/config/api.rs @@ -4,7 +4,10 @@ use url::Url; use vector_lib::configurable::configurable_component; /// API options. -#[configurable_component(api("api"))] +#[configurable_component] +#[configurable(metadata( + docs::warnings = "The API currently does not support authentication. Only enable it in isolated environments or for debugging. It must not be exposed to untrusted clients." +))] #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[serde(default, deny_unknown_fields)] pub struct Options { diff --git a/src/config/builder.rs b/src/config/builder.rs index f732d37c89665..cce736f03b707 100644 --- a/src/config/builder.rs +++ b/src/config/builder.rs @@ -25,7 +25,6 @@ pub struct ConfigBuilder { pub api: api::Options, #[configurable(derived)] - #[configurable(metadata(docs::hidden))] #[serde(default)] pub schema: schema::Options, @@ -34,22 +33,27 @@ pub struct ConfigBuilder { pub healthchecks: HealthcheckOptions, /// All configured enrichment tables. + #[configurable(metadata(docs::additional_props_description = "An enrichment table."))] #[serde(default)] pub enrichment_tables: IndexMap>, /// All configured sources. + #[configurable(metadata(docs::additional_props_description = "A source."))] #[serde(default)] pub sources: IndexMap, /// All configured sinks. + #[configurable(metadata(docs::additional_props_description = "A sink."))] #[serde(default)] pub sinks: IndexMap>, /// All configured transforms. + #[configurable(metadata(docs::additional_props_description = "A transform."))] #[serde(default)] pub transforms: IndexMap>, /// All configured unit tests. + #[configurable(metadata(docs::hidden))] #[serde(default)] pub tests: Vec>, @@ -57,9 +61,11 @@ pub struct ConfigBuilder { /// /// Configuration providers allow sourcing configuration information from a source other than /// the typical configuration files that must be passed to Vector. + #[configurable(metadata(docs::hidden))] pub provider: Option, /// All configured secrets backends. + #[configurable(metadata(docs::additional_props_description = "A secret backend."))] #[serde(default)] pub secret: IndexMap, diff --git a/src/config/loading/secret.rs b/src/config/loading/secret.rs index a7e27901b3811..8877c3d1f5494 100644 --- a/src/config/loading/secret.rs +++ b/src/config/loading/secret.rs @@ -25,10 +25,11 @@ use crate::{ // - "SECRET[backend.secret_name]" will match and capture "backend" and "secret_name" // - "SECRET[backend.secret.name]" will match and capture "backend" and "secret.name" // - "SECRET[backend..secret.name]" will match and capture "backend" and ".secret.name" +// - "SECRET[backend.path/to/secret]" will match and capture "backend" and "path/to/secret" // - "SECRET[secret_name]" will not match // - "SECRET[.secret.name]" will not match pub static COLLECTOR: LazyLock = - LazyLock::new(|| Regex::new(r"SECRET\[([[:word:]]+)\.([[:word:].-]+)\]").unwrap()); + LazyLock::new(|| Regex::new(r"SECRET\[([[:word:]]+)\.([[:word:].\-/]+)\]").unwrap()); /// Helper type for specifically deserializing secrets backends. #[derive(Debug, Default, Deserialize, Serialize)] @@ -182,6 +183,8 @@ mod tests { let secrets: HashMap = vec![ ("a.secret.key".into(), "value".into()), ("a...key".into(), "a...value".into()), + ("backend.path/to/secret".into(), "secret_value".into()), + ("backend.nested/dir/file".into(), "nested_value".into()), ] .into_iter() .collect(); @@ -203,6 +206,14 @@ mod tests { Ok("a...value".into()), interpolate("SECRET[a...key]", &secrets) ); + assert_eq!( + Ok("secret_value".into()), + interpolate("SECRET[backend.path/to/secret]", &secrets) + ); + assert_eq!( + Ok("nested_value".into()), + interpolate("SECRET[backend.nested/dir/file]", &secrets) + ); assert_eq!( Ok("xxxSECRET[non_matching_syntax]yyy".into()), interpolate("xxxSECRET[non_matching_syntax]yyy", &secrets) @@ -227,6 +238,8 @@ mod tests { SECRET[second_backend.secret.key] SECRET[first_backend.a_third.secret_key] SECRET[first_backend...an_extra_secret_key] + SECRET[first_backend.path/to/secret] + SECRET[second_backend.nested/dir/secret] SECRET[non_matching_syntax] SECRET[.non.matching.syntax] "}, @@ -237,17 +250,19 @@ mod tests { assert!(keys.contains_key("second_backend")); let first_backend_keys = keys.get("first_backend").unwrap(); - assert_eq!(first_backend_keys.len(), 5); + assert_eq!(first_backend_keys.len(), 6); assert!(first_backend_keys.contains("secret_key")); assert!(first_backend_keys.contains("secret-key")); assert!(first_backend_keys.contains("another_secret_key")); assert!(first_backend_keys.contains("a_third.secret_key")); assert!(first_backend_keys.contains("..an_extra_secret_key")); + assert!(first_backend_keys.contains("path/to/secret")); let second_backend_keys = keys.get("second_backend").unwrap(); - assert_eq!(second_backend_keys.len(), 2); + assert_eq!(second_backend_keys.len(), 3); assert!(second_backend_keys.contains("secret_key")); assert!(second_backend_keys.contains("secret.key")); + assert!(second_backend_keys.contains("nested/dir/secret")); } #[test] diff --git a/src/config/mod.rs b/src/config/mod.rs index fc36a9aa2364b..dceade72d6eeb 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -270,7 +270,7 @@ impl Config { } /// Healthcheck options. -#[configurable_component(global_option("healthchecks"))] +#[configurable_component] #[derive(Clone, Copy, Debug)] #[serde(default)] pub struct HealthcheckOptions { diff --git a/src/config/schema.rs b/src/config/schema.rs index 3a4cd987e7951..388765b29e1b9 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -3,19 +3,41 @@ use vector_lib::{config::LogNamespace, configurable::configurable_component}; pub(crate) use crate::schema::Definition; /// Schema options. +/// +/// **Note:** The `enabled` and `validation` options are experimental and should only be enabled if you +/// understand the limitations. While the infrastructure exists for schema tracking and validation, the +/// full vision of automatic semantic field mapping and comprehensive schema enforcement was never fully +/// realized. +/// +/// If you encounter issues with these features, please [report them here](https://github.com/vectordotdev/vector/issues/new?template=bug.yml). #[configurable_component] #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[serde(default, deny_unknown_fields)] pub struct Options { - /// Whether or not schema is enabled. + /// When enabled, Vector tracks the schema (field types and structure) of events as they flow + /// from sources through transforms to sinks. This allows Vector to understand what data each + /// component receives and produces. #[serde(default = "default_enabled")] pub enabled: bool, - /// Whether or not schema validation is enabled. + /// When enabled, Vector validates that events flowing into each sink match the schema + /// requirements of that sink. If a sink requires certain fields or types that are missing + /// from the incoming events, Vector will report an error during configuration validation. + /// + /// This helps catch pipeline configuration errors early, before runtime. #[serde(default = "default_validation")] pub validation: bool, - /// Whether or not to enable log namespacing. + /// Controls how metadata is stored in log events. + /// + /// When set to `false` (legacy mode), metadata fields like `host`, `timestamp`, and `source_type` + /// are stored as top-level fields alongside your log data. + /// + /// When set to `true` (Vector namespace mode), metadata is stored in a separate metadata namespace, + /// keeping it distinct from your actual log data. + /// + /// See the [Log Namespacing guide](/guides/level-up/log_namespace/) for detailed information + /// about when to use Vector namespace mode and how to migrate from legacy mode. pub log_namespace: Option, } diff --git a/src/enrichment_tables/mod.rs b/src/enrichment_tables/mod.rs index f814e175f576e..fb693b180308b 100644 --- a/src/enrichment_tables/mod.rs +++ b/src/enrichment_tables/mod.rs @@ -35,7 +35,7 @@ pub mod mmdb; /// condition. We don't recommend using a condition that uses only date range searches. /// /// -#[configurable_component(global_option("enrichment_tables"))] +#[configurable_component] #[derive(Clone, Debug)] #[serde(tag = "type", rename_all = "snake_case")] #[enum_dispatch(EnrichmentTableConfig)] @@ -67,6 +67,21 @@ pub enum EnrichmentTables { Mmdb(mmdb::MmdbConfig), } +// Manual NamedComponent impl required because enum_dispatch doesn't support it yet. +impl vector_lib::configurable::NamedComponent for EnrichmentTables { + fn get_component_name(&self) -> &'static str { + match self { + Self::File(config) => config.get_component_name(), + #[cfg(feature = "enrichment-tables-memory")] + Self::Memory(config) => config.get_component_name(), + #[cfg(feature = "enrichment-tables-geoip")] + Self::Geoip(config) => config.get_component_name(), + #[cfg(feature = "enrichment-tables-mmdb")] + Self::Mmdb(config) => config.get_component_name(), + } + } +} + impl GenerateConfig for EnrichmentTables { fn generate_config() -> toml::Value { toml::Value::try_from(Self::File(file::FileConfig { diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index 30f282686357a..02b7460856f2d 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -51,6 +51,12 @@ mod encoding_transcode; mod eventstoredb_metrics; #[cfg(feature = "sources-exec")] mod exec; +#[cfg(any( + feature = "sources-file", + feature = "sources-kubernetes_logs", + feature = "sinks-file", +))] +mod file; #[cfg(any(feature = "sources-file_descriptor", feature = "sources-stdin"))] mod file_descriptor; #[cfg(feature = "transforms-filter")] @@ -142,13 +148,8 @@ mod websocket; mod websocket_server; #[cfg(feature = "transforms-window")] mod window; - -#[cfg(any( - feature = "sources-file", - feature = "sources-kubernetes_logs", - feature = "sinks-file", -))] -mod file; +#[cfg(all(windows, feature = "sources-windows_event_log"))] +mod windows_event_log; #[cfg(windows)] mod windows; @@ -293,6 +294,8 @@ pub(crate) use self::websocket_server::*; pub(crate) use self::window::*; #[cfg(windows)] pub(crate) use self::windows::*; +#[cfg(all(windows, feature = "sources-windows_event_log"))] +pub(crate) use self::windows_event_log::*; pub use self::{ adaptive_concurrency::*, batch::*, common::*, conditions::*, encoding_transcode::*, heartbeat::*, http::*, open::*, process::*, socket::*, tcp::*, template::*, udp::*, diff --git a/src/internal_events/windows_event_log.rs b/src/internal_events/windows_event_log.rs new file mode 100644 index 0000000000000..ae5363e2c52be --- /dev/null +++ b/src/internal_events/windows_event_log.rs @@ -0,0 +1,125 @@ +use metrics::counter; +use tracing::error; +use vector_lib::{ + NamedInternalEvent, + internal_event::{InternalEvent, error_stage, error_type}, +}; + +#[derive(Debug, NamedInternalEvent)] +pub struct WindowsEventLogParseError { + pub error: String, + pub channel: String, + pub event_id: Option, +} + +impl InternalEvent for WindowsEventLogParseError { + fn emit(self) { + error!( + message = "Failed to parse Windows Event Log event.", + error = %self.error, + channel = %self.channel, + event_id = ?self.event_id, + error_code = "parse_failed", + error_type = error_type::PARSER_FAILED, + stage = error_stage::PROCESSING, + internal_log_rate_limit = true, + ); + counter!( + "component_errors_total", + "error_code" => "parse_failed", + "error_type" => error_type::PARSER_FAILED, + "stage" => error_stage::PROCESSING, + ) + .increment(1); + } +} + +#[derive(Debug, NamedInternalEvent)] +pub struct WindowsEventLogQueryError { + pub channel: String, + pub query: Option, + pub error: String, +} + +impl InternalEvent for WindowsEventLogQueryError { + fn emit(self) { + error!( + message = "Failed to query Windows Event Log.", + channel = %self.channel, + query = ?self.query, + error = %self.error, + error_code = "query_failed", + error_type = error_type::REQUEST_FAILED, + stage = error_stage::RECEIVING, + internal_log_rate_limit = true, + ); + counter!( + "component_errors_total", + "error_code" => "query_failed", + "error_type" => error_type::REQUEST_FAILED, + "stage" => error_stage::RECEIVING, + ) + .increment(1); + } +} + +#[derive(Debug, NamedInternalEvent)] +pub struct WindowsEventLogBookmarkError { + pub channel: String, + pub error: String, +} + +impl InternalEvent for WindowsEventLogBookmarkError { + fn emit(self) { + error!( + message = "Failed to save bookmark for Windows Event Log channel.", + channel = %self.channel, + error = %self.error, + error_code = "bookmark_failed", + error_type = error_type::REQUEST_FAILED, + stage = error_stage::PROCESSING, + internal_log_rate_limit = true, + ); + counter!( + "component_errors_total", + "error_code" => "bookmark_failed", + "error_type" => error_type::REQUEST_FAILED, + "stage" => error_stage::PROCESSING, + ) + .increment(1); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_error() { + let event = WindowsEventLogParseError { + error: "Test error".to_string(), + channel: "System".to_string(), + event_id: Some(1000), + }; + event.emit(); + } + + #[test] + fn test_query_error() { + let event = WindowsEventLogQueryError { + channel: "System".to_string(), + query: Some("*[System]".to_string()), + error: "Operation timed out".to_string(), + }; + event.emit(); + } + + #[test] + fn test_bookmark_error() { + let event = WindowsEventLogBookmarkError { + channel: "System".to_string(), + error: "Failed to save bookmark".to_string(), + }; + event.emit(); + } +} diff --git a/src/providers/http.rs b/src/providers/http.rs index 975db2e17f319..5555a99e0e98d 100644 --- a/src/providers/http.rs +++ b/src/providers/http.rs @@ -21,6 +21,7 @@ use crate::{ #[derive(Clone, Debug)] pub struct RequestConfig { /// HTTP headers to add to the request. + #[configurable(metadata(docs::additional_props_description = "An HTTP header."))] #[serde(default)] pub headers: IndexMap, } diff --git a/src/secrets/mod.rs b/src/secrets/mod.rs index 162e589f23f13..19bc6471ee96e 100644 --- a/src/secrets/mod.rs +++ b/src/secrets/mod.rs @@ -51,7 +51,7 @@ mod test; /// Secrets are loaded when Vector starts or if Vector receives a `SIGHUP` signal triggering its /// configuration reload process. #[allow(clippy::large_enum_variant)] -#[configurable_component(global_option("secret"))] +#[configurable_component] #[derive(Clone, Debug)] #[enum_dispatch(SecretBackend)] #[serde(tag = "type", rename_all = "snake_case")] @@ -79,6 +79,20 @@ pub enum SecretBackends { Test(test::TestBackend), } +// Manual NamedComponent impl required because enum_dispatch doesn't support it yet. +impl vector_lib::configurable::NamedComponent for SecretBackends { + fn get_component_name(&self) -> &'static str { + match self { + Self::File(config) => config.get_component_name(), + Self::Directory(config) => config.get_component_name(), + Self::Exec(config) => config.get_component_name(), + #[cfg(feature = "secrets-aws-secrets-manager")] + Self::AwsSecretsManager(config) => config.get_component_name(), + Self::Test(config) => config.get_component_name(), + } + } +} + impl GenerateConfig for SecretBackends { fn generate_config() -> toml::Value { toml::Value::try_from(Self::File(file::FileBackend { diff --git a/src/sources/mod.rs b/src/sources/mod.rs index 77258bdb77a46..cdc03f46fdcbb 100644 --- a/src/sources/mod.rs +++ b/src/sources/mod.rs @@ -94,6 +94,8 @@ pub mod syslog; pub mod vector; #[cfg(feature = "sources-websocket")] pub mod websocket; +#[cfg(feature = "sources-windows_event_log")] +pub mod windows_event_log; pub mod util; diff --git a/src/sources/opentelemetry/config.rs b/src/sources/opentelemetry/config.rs index 96b290e7d9b09..c7eed8f301b50 100644 --- a/src/sources/opentelemetry/config.rs +++ b/src/sources/opentelemetry/config.rs @@ -47,6 +47,65 @@ pub const LOGS: &str = "logs"; pub const METRICS: &str = "metrics"; pub const TRACES: &str = "traces"; +/// Configuration for OTLP decoding behavior. +#[configurable_component] +#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct OtlpDecodingConfig { + /// Whether to use OTLP decoding for logs. + /// + /// When `true`, logs preserve their OTLP format. + /// When `false` (default), logs are converted to Vector's native format. + #[serde(default)] + pub logs: bool, + + /// Whether to use OTLP decoding for metrics. + /// + /// When `true`, metrics preserve their OTLP format but are processed as logs. + /// When `false` (default), metrics are converted to Vector's native metric format. + #[serde(default)] + pub metrics: bool, + + /// Whether to use OTLP decoding for traces. + /// + /// When `true`, traces preserve their OTLP format. + /// When `false` (default), traces are converted to Vector's native format. + #[serde(default)] + pub traces: bool, +} + +impl From for OtlpDecodingConfig { + /// Converts a boolean value to an OtlpDecodingConfig. + /// + /// This provides backward compatibility with the previous boolean configuration. + /// - `true` enables OTLP decoding for all signals + /// - `false` disables OTLP decoding for all signals (uses Vector native format) + fn from(value: bool) -> Self { + Self { + logs: value, + metrics: value, + traces: value, + } + } +} + +impl OtlpDecodingConfig { + /// Returns true if any signal is configured to use OTLP decoding. + pub const fn any_enabled(&self) -> bool { + self.logs || self.metrics || self.traces + } + + /// Returns true if all signals are configured to use OTLP decoding. + pub const fn all_enabled(&self) -> bool { + self.logs && self.metrics && self.traces + } + + /// Returns true if signals have mixed configuration (some enabled, some disabled). + pub const fn is_mixed(&self) -> bool { + self.any_enabled() && !self.all_enabled() + } +} + /// Configuration for the `opentelemetry` source. #[configurable_component(source("opentelemetry", "Receive OTLP data through gRPC or HTTP."))] #[derive(Clone, Debug)] @@ -67,14 +126,36 @@ pub struct OpentelemetryConfig { #[serde(default)] pub log_namespace: Option, - /// Setting this field will override the legacy mapping of OTEL protos to Vector events and use the proto directly. + /// Configuration for OTLP decoding behavior. /// - /// One major caveat here is that the incoming metrics will be parsed as logs but they will preserve the OTLP format. - /// This means that components that work on metrics, will not be compatible with this output. - /// However, these events can be forwarded directly to a downstream OTEL collector. - #[configurable(derived)] - #[serde(default)] - pub use_otlp_decoding: bool, + /// This configuration controls how OpenTelemetry Protocol (OTLP) data is decoded for each + /// signal type (logs, metrics, traces). When a signal is configured to use OTLP decoding, the raw OTLP format is + /// preserved, allowing the data to be forwarded to downstream OTLP collectors without transformation. + /// Otherwise, the signal is converted to Vector's native event format. + /// + /// Simple boolean form: + /// + /// ```yaml + /// use_otlp_decoding: true # All signals preserve OTLP format + /// # or + /// use_otlp_decoding: false # All signals use Vector native format (default) + /// ``` + /// + /// Per-signal configuration: + /// + /// ```yaml + /// use_otlp_decoding: + /// logs: false # Convert to Vector native format + /// metrics: false # Convert to Vector native format + /// traces: true # Preserve OTLP format + /// ``` + /// + /// **Note:** When OTLP decoding is enabled for metrics: + /// - Metrics are parsed as logs while preserving the OTLP format + /// - Vector's metric transforms will NOT be compatible with this output + /// - The events can be forwarded directly (passthrough) to a downstream OTLP collector + #[serde(default, deserialize_with = "bool_or_struct")] + pub use_otlp_decoding: OtlpDecodingConfig, } /// Configuration for the `opentelemetry` gRPC server. @@ -152,18 +233,24 @@ impl GenerateConfig for OpentelemetryConfig { http: example_http_config(), acknowledgements: Default::default(), log_namespace: None, - use_otlp_decoding: false, + use_otlp_decoding: OtlpDecodingConfig::default(), }) .unwrap() } } impl OpentelemetryConfig { - fn get_signal_deserializer( + pub(crate) fn get_signal_deserializer( &self, signal_type: OtlpSignalType, ) -> vector_common::Result> { - if self.use_otlp_decoding { + let should_use_otlp = match signal_type { + OtlpSignalType::Logs => self.use_otlp_decoding.logs, + OtlpSignalType::Metrics => self.use_otlp_decoding.metrics, + OtlpSignalType::Traces => self.use_otlp_decoding.traces, + }; + + if should_use_otlp { Ok(Some(OtlpDeserializer::new_with_signals(IndexSet::from([ signal_type, ])))) @@ -183,6 +270,16 @@ impl SourceConfig for OpentelemetryConfig { let grpc_tls_settings = MaybeTlsSettings::from_config(self.grpc.tls.as_ref(), true)?; + // Log info message when using mixed OTLP decoding formats + if self.use_otlp_decoding.is_mixed() { + info!( + message = "Signals with OTLP decoding enabled will preserve raw format; others will use Vector native format.", + logs_otlp = self.use_otlp_decoding.logs, + metrics_otlp = self.use_otlp_decoding.metrics, + traces_otlp = self.use_otlp_decoding.traces, + ); + } + let logs_deserializer = self.get_signal_deserializer(OtlpSignalType::Logs)?; let metrics_deserializer = self.get_signal_deserializer(OtlpSignalType::Metrics)?; let traces_deserializer = self.get_signal_deserializer(OtlpSignalType::Traces)?; @@ -352,13 +449,20 @@ impl SourceConfig for OpentelemetryConfig { } }; - let metrics_output = if self.use_otlp_decoding { + let logs_output = if self.use_otlp_decoding.logs { + SourceOutput::new_maybe_logs(DataType::Log, Definition::any()).with_port(LOGS) + } else { + SourceOutput::new_maybe_logs(DataType::Log, schema_definition).with_port(LOGS) + }; + + let metrics_output = if self.use_otlp_decoding.metrics { SourceOutput::new_maybe_logs(DataType::Log, Definition::any()).with_port(METRICS) } else { SourceOutput::new_metrics().with_port(METRICS) }; + vec![ - SourceOutput::new_maybe_logs(DataType::Log, schema_definition).with_port(LOGS), + logs_output, metrics_output, SourceOutput::new_traces().with_port(TRACES), ] diff --git a/src/sources/opentelemetry/integration_tests.rs b/src/sources/opentelemetry/integration_tests.rs index c712b08d7741f..00d642f41e47e 100644 --- a/src/sources/opentelemetry/integration_tests.rs +++ b/src/sources/opentelemetry/integration_tests.rs @@ -62,7 +62,7 @@ async fn receive_logs_legacy_namespace() { }, acknowledgements: Default::default(), log_namespace: Default::default(), - use_otlp_decoding: false, + use_otlp_decoding: false.into(), }; let (sender, logs_output, _) = new_source(EventStatus::Delivered, LOGS.to_string()); @@ -161,7 +161,7 @@ async fn receive_trace() { }, acknowledgements: Default::default(), log_namespace: Default::default(), - use_otlp_decoding: false, + use_otlp_decoding: false.into(), }; let (sender, trace_output, _) = new_source(EventStatus::Delivered, TRACES.to_string()); @@ -266,7 +266,7 @@ async fn receive_metric() { }, acknowledgements: Default::default(), log_namespace: Default::default(), - use_otlp_decoding: false, + use_otlp_decoding: false.into(), }; let (sender, metrics_output, _) = new_source(EventStatus::Delivered, METRICS.to_string()); diff --git a/src/sources/opentelemetry/tests.rs b/src/sources/opentelemetry/tests.rs index 466196ded223d..de5781756dba1 100644 --- a/src/sources/opentelemetry/tests.rs +++ b/src/sources/opentelemetry/tests.rs @@ -1087,7 +1087,7 @@ fn get_source_config_with_headers( }, acknowledgements: Default::default(), log_namespace: Default::default(), - use_otlp_decoding, + use_otlp_decoding: use_otlp_decoding.into(), } } @@ -1263,7 +1263,7 @@ pub async fn build_otlp_test_env( }, acknowledgements: Default::default(), log_namespace, - use_otlp_decoding: false, + use_otlp_decoding: false.into(), }; let (sender, output, _) = new_source(EventStatus::Delivered, event_name.to_string()); @@ -1342,7 +1342,7 @@ async fn http_logs_use_otlp_decoding_emits_metric() { }, acknowledgements: Default::default(), log_namespace: None, - use_otlp_decoding: true, + use_otlp_decoding: true.into(), }; let (sender, logs_output, _) = new_source(EventStatus::Delivered, LOGS.to_string()); @@ -1409,3 +1409,279 @@ async fn http_logs_use_otlp_decoding_emits_metric() { _ => panic!("component_received_events_total should be a counter"), } } + +#[cfg(test)] +mod otlp_decoding_config_tests { + use crate::config::{DataType, LogNamespace, SourceConfig}; + use crate::sources::opentelemetry::config::{ + GrpcConfig, HttpConfig, OpentelemetryConfig, OtlpDecodingConfig, + }; + use vector_lib::codecs::decoding::OtlpSignalType; + + #[test] + fn test_otlp_decoding_mixed_configurations() { + // Test single signal enabled + let config = OtlpDecodingConfig { + logs: false, + metrics: false, + traces: true, + }; + assert!(config.any_enabled()); + assert!(!config.all_enabled()); + assert!(config.is_mixed()); + + // Test two signals enabled + let config = OtlpDecodingConfig { + logs: true, + metrics: false, + traces: true, + }; + assert!(config.any_enabled()); + assert!(!config.all_enabled()); + assert!(config.is_mixed()); + + // Test different single signal + let config = OtlpDecodingConfig { + logs: true, + metrics: false, + traces: false, + }; + assert!(config.any_enabled()); + assert!(!config.all_enabled()); + assert!(config.is_mixed()); + } + + #[test] + fn test_otlp_decoding_from_bool() { + // Test direct From trait implementation + let config_true = OtlpDecodingConfig::from(true); + assert!(config_true.logs); + assert!(config_true.metrics); + assert!(config_true.traces); + assert!(config_true.all_enabled()); + assert!(!config_true.is_mixed()); + + let config_false = OtlpDecodingConfig::from(false); + assert!(!config_false.logs); + assert!(!config_false.metrics); + assert!(!config_false.traces); + assert!(!config_false.any_enabled()); + assert!(!config_false.is_mixed()); + + // Test TOML deserialization (which uses From under the hood) + let config: OpentelemetryConfig = toml::from_str( + r#" + use_otlp_decoding = true + + [grpc] + address = "0.0.0.0:4317" + + [http] + address = "0.0.0.0:4318" + "#, + ) + .unwrap(); + assert!(config.use_otlp_decoding.logs); + assert!(config.use_otlp_decoding.metrics); + assert!(config.use_otlp_decoding.traces); + + let config: OpentelemetryConfig = toml::from_str( + r#" + use_otlp_decoding = false + + [grpc] + address = "0.0.0.0:4317" + + [http] + address = "0.0.0.0:4318" + "#, + ) + .unwrap(); + assert!(!config.use_otlp_decoding.logs); + assert!(!config.use_otlp_decoding.metrics); + assert!(!config.use_otlp_decoding.traces); + } + + #[test] + fn test_otlp_decoding_deserialization_from_struct() { + // Test deserializing from a struct with all fields + let config: OpentelemetryConfig = toml::from_str( + r#" + [grpc] + address = "0.0.0.0:4317" + + [http] + address = "0.0.0.0:4318" + + [use_otlp_decoding] + logs = false + metrics = false + traces = true + "#, + ) + .unwrap(); + assert!(!config.use_otlp_decoding.logs); + assert!(!config.use_otlp_decoding.metrics); + assert!(config.use_otlp_decoding.traces); + + // Test deserializing from a struct with partial fields (using defaults) + let config: OpentelemetryConfig = toml::from_str( + r#" + [grpc] + address = "0.0.0.0:4317" + + [http] + address = "0.0.0.0:4318" + + [use_otlp_decoding] + traces = true + "#, + ) + .unwrap(); + assert!(!config.use_otlp_decoding.logs); // default false + assert!(!config.use_otlp_decoding.metrics); // default false + assert!(config.use_otlp_decoding.traces); + } + + #[test] + fn test_otlp_decoding_default_when_not_specified() { + // Test that when use_otlp_decoding is not specified, it uses defaults (all false) + let config: OpentelemetryConfig = toml::from_str( + r#" + [grpc] + address = "0.0.0.0:4317" + + [http] + address = "0.0.0.0:4318" + "#, + ) + .unwrap(); + assert!(!config.use_otlp_decoding.logs); + assert!(!config.use_otlp_decoding.metrics); + assert!(!config.use_otlp_decoding.traces); + } + + #[tokio::test] + async fn test_get_signal_deserializer_per_signal() { + let config_all_true = OpentelemetryConfig { + grpc: GrpcConfig { + address: "0.0.0.0:4317".parse().unwrap(), + tls: None, + }, + http: HttpConfig { + address: "0.0.0.0:4318".parse().unwrap(), + tls: None, + keepalive: Default::default(), + headers: vec![], + }, + acknowledgements: Default::default(), + log_namespace: None, + use_otlp_decoding: OtlpDecodingConfig { + logs: true, + metrics: true, + traces: true, + }, + }; + + // All should return Some deserializer + assert!( + config_all_true + .get_signal_deserializer(OtlpSignalType::Logs) + .unwrap() + .is_some() + ); + assert!( + config_all_true + .get_signal_deserializer(OtlpSignalType::Metrics) + .unwrap() + .is_some() + ); + assert!( + config_all_true + .get_signal_deserializer(OtlpSignalType::Traces) + .unwrap() + .is_some() + ); + + let config_mixed = OpentelemetryConfig { + grpc: GrpcConfig { + address: "0.0.0.0:4317".parse().unwrap(), + tls: None, + }, + http: HttpConfig { + address: "0.0.0.0:4318".parse().unwrap(), + tls: None, + keepalive: Default::default(), + headers: vec![], + }, + acknowledgements: Default::default(), + log_namespace: None, + use_otlp_decoding: OtlpDecodingConfig { + logs: false, + metrics: false, + traces: true, + }, + }; + + // Only traces should return Some deserializer + assert!( + config_mixed + .get_signal_deserializer(OtlpSignalType::Logs) + .unwrap() + .is_none() + ); + assert!( + config_mixed + .get_signal_deserializer(OtlpSignalType::Metrics) + .unwrap() + .is_none() + ); + assert!( + config_mixed + .get_signal_deserializer(OtlpSignalType::Traces) + .unwrap() + .is_some() + ); + } + + #[test] + fn test_outputs_configuration_per_signal() { + let config_mixed = OpentelemetryConfig { + grpc: GrpcConfig { + address: "0.0.0.0:4317".parse().unwrap(), + tls: None, + }, + http: HttpConfig { + address: "0.0.0.0:4318".parse().unwrap(), + tls: None, + keepalive: Default::default(), + headers: vec![], + }, + acknowledgements: Default::default(), + log_namespace: None, + use_otlp_decoding: OtlpDecodingConfig { + logs: false, + metrics: true, + traces: true, + }, + }; + + let outputs = config_mixed.outputs(LogNamespace::Legacy); + assert_eq!(outputs.len(), 3); + + // Verify logs output (native format) + let logs_output = &outputs[0]; + assert_eq!(logs_output.port.as_deref(), Some("logs")); + assert_eq!(logs_output.ty, DataType::Log); + + // Verify metrics output (OTLP format, logs data type) + let metrics_output = &outputs[1]; + assert_eq!(metrics_output.port.as_deref(), Some("metrics")); + assert_eq!(metrics_output.ty, DataType::Log); // Should be Log when OTLP decoding is enabled + + // Verify traces output (OTLP format, traces data type) + let traces_output = &outputs[2]; + assert_eq!(traces_output.port.as_deref(), Some("traces")); + assert_eq!(traces_output.ty, DataType::Trace); // Should always be Trace regardless of OTLP decoding + } +} diff --git a/src/sources/syslog.rs b/src/sources/syslog.rs index 9a3d78b8e1bad..ca4756aeff584 100644 --- a/src/sources/syslog.rs +++ b/src/sources/syslog.rs @@ -1208,7 +1208,7 @@ mod test { // Create and spawn the source. let config = SyslogConfig::from_mode(Mode::Udp { address: in_addr.into(), - receive_buffer_bytes: None, + receive_buffer_bytes: Some(4 * 1024 * 1024), }); let key = ComponentKey::from("in"); diff --git a/src/sources/windows_event_log/bookmark.rs b/src/sources/windows_event_log/bookmark.rs new file mode 100644 index 0000000000000..02032c3844f91 --- /dev/null +++ b/src/sources/windows_event_log/bookmark.rs @@ -0,0 +1,297 @@ +//! Windows Event Log Bookmark Management +//! +//! Provides bookmark-based checkpointing for Windows Event Log subscriptions. +//! Bookmarks survive channel clears and log rotations, and provide O(1) seeking. + +use tracing::{debug, error}; +use windows::{ + Win32::System::EventLog::{ + EVT_HANDLE, EvtClose, EvtCreateBookmark, EvtRender, EvtRenderBookmark, EvtUpdateBookmark, + }, + core::HSTRING, +}; + +use super::error::WindowsEventLogError; + +/// Maximum size for rendered bookmark XML (1 MB should be more than enough) +const MAX_BOOKMARK_XML_SIZE: usize = 1024 * 1024; + +/// Manages a Windows Event Log bookmark for checkpoint tracking +/// +/// Bookmarks provide robust, Windows-managed position tracking in event logs. +/// They are opaque handles that can be serialized to XML for persistence. +#[derive(Debug)] +pub struct BookmarkManager { + handle: EVT_HANDLE, +} + +impl BookmarkManager { + /// Creates a new bookmark (not associated with any event yet) + /// + /// # Errors + /// + /// Returns an error if the Windows API fails to create the bookmark. + pub fn new() -> Result { + unsafe { + let handle = EvtCreateBookmark(None).map_err(|e| { + error!(message = "Failed to create bookmark.", error = %e); + WindowsEventLogError::CreateSubscriptionError { source: e } + })?; + + debug!(message = "Created new bookmark.", handle = ?handle); + + Ok(Self { handle }) + } + } + + /// Creates a bookmark from serialized XML + /// + /// This is used when resuming from a checkpoint. + /// + /// # Arguments + /// + /// * `xml` - The XML string representation of a bookmark + /// + /// # Errors + /// + /// Returns an error if the XML is invalid or the Windows API fails. + pub fn from_xml(xml: &str) -> Result { + if xml.is_empty() { + return Self::new(); // Empty XML = fresh bookmark + } + + unsafe { + let xml_hstring = HSTRING::from(xml); + match EvtCreateBookmark(&xml_hstring) { + Ok(handle) => { + debug!(message = "Created bookmark from XML.", handle = ?handle); + Ok(Self { handle }) + } + Err(e) => { + // Propagate the error so the caller can decide how to handle it + // (e.g., fall back to a fresh bookmark with has_valid_checkpoint = false) + Err(WindowsEventLogError::CreateSubscriptionError { source: e }) + } + } + } + } + + /// Updates the bookmark to point to the given event + /// + /// Call this after successfully processing an event to update the checkpoint position. + /// + /// # Arguments + /// + /// * `event_handle` - Handle to the event to bookmark + /// + /// # Errors + /// + /// Returns an error if the Windows API fails to update the bookmark. + pub fn update(&mut self, event_handle: EVT_HANDLE) -> Result<(), WindowsEventLogError> { + unsafe { + EvtUpdateBookmark(self.handle, event_handle).map_err(|e| { + error!(message = "Failed to update bookmark.", error = %e); + WindowsEventLogError::SubscriptionError { source: e } + })?; + + debug!(message = "Updated bookmark.", event_handle = ?event_handle); + Ok(()) + } + } + + /// Serializes the bookmark to XML for persistence + /// + /// The returned XML string can be saved to a checkpoint file and later + /// restored using `from_xml()`. + /// + /// # Errors + /// + /// Returns an error if the Windows API fails to render the bookmark. + /// + /// Note: For lock-free serialization, prefer `serialize_handle()` which + /// allows copying the handle out of a lock before serializing. + #[cfg(test)] + pub fn to_xml(&self) -> Result { + unsafe { + // EvtRender params: Context, Fragment, Flags, BufferSize, Buffer, BufferUsed, PropertyCount + // BufferUsed (6th param) receives the required size in bytes + // PropertyCount (7th param) receives the number of properties + let mut required_size: u32 = 0; + let mut property_count: u32 = 0; + + // First call with null buffer to get required size + // ERROR_INSUFFICIENT_BUFFER (122 / 0x7A) is expected + let _ = EvtRender( + None, + self.handle, + EvtRenderBookmark.0, + 0, + None, + &mut required_size, + &mut property_count, + ); + + if required_size == 0 { + // Bookmark hasn't been updated with any events yet - return empty string + // This is normal for fresh bookmarks before first event + debug!(message = "Bookmark not yet updated, skipping serialization."); + return Ok(String::new()); + } + + if required_size > MAX_BOOKMARK_XML_SIZE as u32 { + return Err(WindowsEventLogError::RenderError { + message: format!("Bookmark buffer size too large: {}", required_size), + }); + } + + // Allocate buffer and render bookmark XML + let mut buffer = vec![0u16; (required_size / 2) as usize]; + let mut actual_used: u32 = 0; + + EvtRender( + None, + self.handle, + EvtRenderBookmark.0, + required_size, + Some(buffer.as_mut_ptr() as *mut _), + &mut actual_used, + &mut property_count, + ) + .map_err(|e| WindowsEventLogError::RenderError { + message: format!("Failed to render bookmark XML: {}", e), + })?; + + // Convert UTF-16 buffer to String + let xml = String::from_utf16_lossy(&buffer[0..((actual_used / 2) as usize)]); + + debug!( + message = "Serialized bookmark to XML.", + xml_length = xml.len() + ); + + Ok(xml.trim_end_matches('\0').to_string()) + } + } + + /// Returns the raw Windows handle for use with EvtSubscribe + /// + /// # Safety + /// + /// The returned handle is only valid as long as this BookmarkManager exists. + pub const fn as_handle(&self) -> EVT_HANDLE { + self.handle + } + + /// Serialize an EVT_HANDLE directly to XML without needing a BookmarkManager reference + /// + /// This is useful for serializing bookmarks outside of a lock - you can copy the handle + /// (just an integer) while holding the lock, then call this method after releasing it. + /// + /// # Safety + /// + /// The handle must be a valid bookmark handle that hasn't been closed. + /// Windows EVT_HANDLEs are thread-safe kernel objects, so concurrent + /// EvtUpdateBookmark and EvtRender calls on the same handle are safe. + pub fn serialize_handle(handle: EVT_HANDLE) -> Result { + unsafe { + // First call to get required buffer size + // EvtRender params: Context, Fragment, Flags, BufferSize, Buffer, BufferUsed, PropertyCount + // BufferUsed (param 6) receives the required size when buffer is too small + // PropertyCount (param 7) receives number of properties + let mut buffer_used: u32 = 0; + let mut property_count: u32 = 0; + + // First call with null buffer to get required size (ERROR_INSUFFICIENT_BUFFER expected) + let _ = EvtRender( + None, + handle, + EvtRenderBookmark.0, + 0, + None, + &mut buffer_used, + &mut property_count, + ); + + // buffer_used now contains the required size in bytes + if buffer_used == 0 { + // Bookmark hasn't been updated with any events yet + return Ok(String::new()); + } + + if buffer_used > MAX_BOOKMARK_XML_SIZE as u32 { + return Err(WindowsEventLogError::RenderError { + message: format!("Bookmark buffer size too large: {}", buffer_used), + }); + } + + // Allocate buffer (buffer_used is in bytes, UTF-16 chars are 2 bytes each) + let mut buffer = vec![0u16; (buffer_used / 2) as usize + 1]; + + let mut actual_used: u32 = 0; + EvtRender( + None, + handle, + EvtRenderBookmark.0, + buffer_used, + Some(buffer.as_mut_ptr() as *mut _), + &mut actual_used, + &mut property_count, + ) + .map_err(|e| WindowsEventLogError::RenderError { + message: format!("Failed to render bookmark XML: {}", e), + })?; + + let xml = String::from_utf16_lossy(&buffer[0..((actual_used / 2) as usize)]); + Ok(xml.trim_end_matches('\0').to_string()) + } + } + + /// Closes the bookmark handle + /// + /// This is called automatically when the BookmarkManager is dropped. + fn close(&mut self) { + if self.handle.0 != 0 { + unsafe { + let _ = EvtClose(self.handle); + debug!(message = "Closed bookmark handle.", handle = ?self.handle); + self.handle = EVT_HANDLE(0); + } + } + } +} + +impl Drop for BookmarkManager { + fn drop(&mut self) { + self.close(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bookmark_lifecycle() { + // Test creating a new bookmark + let bookmark = BookmarkManager::new(); + assert!(bookmark.is_ok()); + + // Test serialization (should work even without updating) + let xml = bookmark.unwrap().to_xml(); + assert!(xml.is_ok()); + } + + #[test] + fn test_bookmark_from_empty_xml() { + // Empty XML should create a fresh bookmark + let bookmark = BookmarkManager::from_xml(""); + assert!(bookmark.is_ok()); + } + + #[test] + fn test_bookmark_handle() { + let bookmark = BookmarkManager::new().unwrap(); + let handle = bookmark.as_handle(); + assert!(!handle.is_invalid(), "Bookmark handle should be valid"); + } +} diff --git a/src/sources/windows_event_log/checkpoint.rs b/src/sources/windows_event_log/checkpoint.rs new file mode 100644 index 0000000000000..46c3ce0ade349 --- /dev/null +++ b/src/sources/windows_event_log/checkpoint.rs @@ -0,0 +1,569 @@ +use std::{ + collections::HashMap, + io::{self, ErrorKind}, + path::{Path, PathBuf}, +}; + +use serde::{Deserialize, Serialize}; +use tokio::{ + fs::{self, OpenOptions}, + io::AsyncWriteExt, + sync::Mutex, +}; +use tracing::{debug, error, info, warn}; +use windows::Win32::Storage::FileSystem::ReplaceFileW; +use windows::core::HSTRING; + +use super::error::WindowsEventLogError; + +const CHECKPOINT_FILENAME: &str = "windows_event_log_checkpoints.json"; + +/// Checkpoint data for a single Windows Event Log channel +/// +/// Uses Windows Event Log bookmarks for robust position tracking that survives +/// channel clears, log rotations, and provides O(1) seeking. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct ChannelCheckpoint { + /// The channel name (e.g., "System", "Application", "Security") + pub channel: String, + /// Windows Event Log bookmark XML for position tracking + pub bookmark_xml: String, + /// Timestamp when this checkpoint was last updated (for debugging) + #[serde(default)] + pub updated_at: String, +} + +/// Container for all channel checkpoints +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +struct CheckpointState { + /// Version for future compatibility + version: u32, + /// Map of channel name to checkpoint + channels: HashMap, +} + +impl Default for CheckpointState { + fn default() -> Self { + Self { + version: 1, // Version 1: bookmark-based checkpointing + channels: HashMap::new(), + } + } +} + +/// Manages checkpoint persistence for Windows Event Log subscriptions +/// +/// Uses Windows Event Log bookmarks (opaque XML handles) to track position in +/// each channel. Bookmarks are more robust than record IDs as they survive +/// channel clears, log rotations, and provide O(1) seeking on restart. +pub struct Checkpointer { + checkpoint_path: PathBuf, + state: Mutex, +} + +impl Checkpointer { + /// Create a new checkpointer for the given data directory + pub async fn new(data_dir: &Path) -> Result { + let checkpoint_path = data_dir.join(CHECKPOINT_FILENAME); + + // Ensure the data directory exists + if let Err(e) = fs::create_dir_all(data_dir).await + && e.kind() != ErrorKind::AlreadyExists + { + return Err(WindowsEventLogError::IoError { source: e }); + } + + // Load existing checkpoint state or create new + let state = Self::load_from_disk(&checkpoint_path).await?; + + info!( + message = "Windows Event Log checkpointer initialized.", + checkpoint_path = %checkpoint_path.display(), + channels = state.channels.len() + ); + + Ok(Self { + checkpoint_path, + state: Mutex::new(state), + }) + } + + /// Get the last checkpoint for a specific channel + pub async fn get(&self, channel: &str) -> Option { + let state = self.state.lock().await; + state.channels.get(channel).cloned() + } + + /// Update the checkpoint for a specific channel using bookmark XML + /// + /// Bookmarks provide robust position tracking that survives channel clears, + /// log rotations, and provides O(1) seeking on restart. + /// + /// Note: For better performance with multiple channels, prefer `set_batch()` + /// which writes all checkpoints in a single disk operation. + #[cfg(test)] + pub async fn set( + &self, + channel: String, + bookmark_xml: String, + ) -> Result<(), WindowsEventLogError> { + let mut state = self.state.lock().await; + + let checkpoint = ChannelCheckpoint { + channel: channel.clone(), + bookmark_xml, + updated_at: chrono::Utc::now().to_rfc3339(), + }; + + state.channels.insert(channel.clone(), checkpoint); + + // Persist to disk immediately for reliability + self.save_to_disk(&state).await?; + + debug!( + message = "Updated checkpoint for channel.", + channel = %channel + ); + + Ok(()) + } + + /// Update multiple channel checkpoints in a single atomic disk write + /// + /// This is much more efficient than calling `set()` multiple times because: + /// - Single file write instead of N writes + /// - Single fsync instead of N fsyncs + /// - Atomic - either all channels update or none do + /// + /// Batching checkpoint updates is standard practice for event log collectors + /// and avoids per-event disk I/O overhead. + pub async fn set_batch( + &self, + updates: Vec<(String, String)>, + ) -> Result<(), WindowsEventLogError> { + if updates.is_empty() { + return Ok(()); + } + + let mut state = self.state.lock().await; + let timestamp = chrono::Utc::now().to_rfc3339(); + + for (channel, bookmark_xml) in &updates { + let checkpoint = ChannelCheckpoint { + channel: channel.clone(), + bookmark_xml: bookmark_xml.clone(), + updated_at: timestamp.clone(), + }; + state.channels.insert(channel.clone(), checkpoint); + } + + // Single disk write for all channels + self.save_to_disk(&state).await?; + + debug!( + message = "Batch updated checkpoints.", + channels_updated = updates.len() + ); + + Ok(()) + } + + /// Load checkpoint state from disk + async fn load_from_disk(path: &Path) -> Result { + match fs::read(path).await { + Ok(contents) => match serde_json::from_slice::(&contents) { + Ok(state) => { + info!( + message = "Loaded existing checkpoints.", + channels = state.channels.len(), + path = %path.display() + ); + Ok(state) + } + Err(e) => { + warn!( + message = "Failed to parse checkpoint file, starting fresh.", + error = %e, + path = %path.display() + ); + Ok(CheckpointState::default()) + } + }, + Err(e) if e.kind() == ErrorKind::NotFound => { + debug!( + message = "No existing checkpoint file, starting fresh.", + path = %path.display() + ); + Ok(CheckpointState::default()) + } + Err(e) => { + error!( + message = "Failed to read checkpoint file.", + error = %e, + path = %path.display() + ); + Err(WindowsEventLogError::IoError { source: e }) + } + } + } + + /// Save checkpoint state to disk atomically + async fn save_to_disk(&self, state: &CheckpointState) -> Result<(), WindowsEventLogError> { + // Use atomic write: write to temp file, then rename + let temp_path = self.checkpoint_path.with_extension("tmp"); + + // Serialize state + let contents = match serde_json::to_vec_pretty(state) { + Ok(c) => c, + Err(e) => { + error!( + message = "Failed to serialize checkpoint state.", + error = %e + ); + return Err(WindowsEventLogError::IoError { + source: io::Error::new(ErrorKind::InvalidData, e), + }); + } + }; + + // Write to temp file + let mut file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&temp_path) + .await + .map_err(|e| WindowsEventLogError::IoError { source: e })?; + + file.write_all(&contents) + .await + .map_err(|e| WindowsEventLogError::IoError { source: e })?; + + file.sync_all() + .await + .map_err(|e| WindowsEventLogError::IoError { source: e })?; + + drop(file); + + // Use ReplaceFileW for atomic replacement on Windows; fall back to + // rename when the destination doesn't exist yet (first run). + #[cfg(windows)] + { + let dst = HSTRING::from(self.checkpoint_path.to_string_lossy().as_ref()); + let src = HSTRING::from(temp_path.to_string_lossy().as_ref()); + let replaced = unsafe { + ReplaceFileW( + &dst, + &src, + None, + windows::Win32::Storage::FileSystem::REPLACE_FILE_FLAGS(0), + None, + None, + ) + }; + if replaced.is_err() { + // Destination may not exist yet — fall back to rename + fs::rename(&temp_path, &self.checkpoint_path) + .await + .map_err(|e| WindowsEventLogError::IoError { source: e })?; + } + } + #[cfg(not(windows))] + { + fs::rename(&temp_path, &self.checkpoint_path) + .await + .map_err(|e| WindowsEventLogError::IoError { source: e })?; + } + + Ok(()) + } + + /// Remove checkpoint for a channel (useful for testing or reset) + #[cfg(test)] + pub async fn remove(&self, channel: &str) -> Result<(), WindowsEventLogError> { + let mut state = self.state.lock().await; + state.channels.remove(channel); + self.save_to_disk(&state).await?; + + info!( + message = "Removed checkpoint for channel.", + channel = %channel + ); + + Ok(()) + } + + /// Get all channel checkpoints (useful for debugging) + #[cfg(test)] + pub async fn list(&self) -> Vec { + let state = self.state.lock().await; + state.channels.values().cloned().collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + /// Helper to create test bookmark XML + fn test_bookmark_xml(channel: &str, record_id: u64) -> String { + format!( + r#""#, + channel, record_id + ) + } + + async fn create_test_checkpointer() -> (Checkpointer, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let checkpointer = Checkpointer::new(temp_dir.path()).await.unwrap(); + (checkpointer, temp_dir) + } + + #[tokio::test] + async fn test_checkpoint_basic_operations() { + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + // Initially empty + assert!(checkpointer.get("System").await.is_none()); + + // Set checkpoint + let bookmark = test_bookmark_xml("System", 12345); + checkpointer + .set("System".to_string(), bookmark.clone()) + .await + .unwrap(); + + // Retrieve checkpoint + let checkpoint = checkpointer.get("System").await.unwrap(); + assert_eq!(checkpoint.channel, "System"); + assert_eq!(checkpoint.bookmark_xml, bookmark); + } + + #[tokio::test] + async fn test_checkpoint_persistence() { + let temp_dir = TempDir::new().unwrap(); + + let system_bookmark = test_bookmark_xml("System", 100); + let app_bookmark = test_bookmark_xml("Application", 200); + + // Create first checkpointer and set values + { + let checkpointer = Checkpointer::new(temp_dir.path()).await.unwrap(); + checkpointer + .set("System".to_string(), system_bookmark.clone()) + .await + .unwrap(); + checkpointer + .set("Application".to_string(), app_bookmark.clone()) + .await + .unwrap(); + } + + // Create new checkpointer (simulating restart) and verify persistence + { + let checkpointer = Checkpointer::new(temp_dir.path()).await.unwrap(); + let system_checkpoint = checkpointer.get("System").await.unwrap(); + assert_eq!(system_checkpoint.bookmark_xml, system_bookmark); + + let app_checkpoint = checkpointer.get("Application").await.unwrap(); + assert_eq!(app_checkpoint.bookmark_xml, app_bookmark); + } + } + + #[tokio::test] + async fn test_checkpoint_update() { + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + // Set initial value + let bookmark1 = test_bookmark_xml("System", 100); + checkpointer + .set("System".to_string(), bookmark1) + .await + .unwrap(); + + // Update value + let bookmark2 = test_bookmark_xml("System", 200); + checkpointer + .set("System".to_string(), bookmark2.clone()) + .await + .unwrap(); + + // Verify updated value + let checkpoint = checkpointer.get("System").await.unwrap(); + assert_eq!(checkpoint.bookmark_xml, bookmark2); + } + + #[tokio::test] + async fn test_checkpoint_multiple_channels() { + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + let system_bookmark = test_bookmark_xml("System", 100); + let app_bookmark = test_bookmark_xml("Application", 200); + let security_bookmark = test_bookmark_xml("Security", 300); + + checkpointer + .set("System".to_string(), system_bookmark.clone()) + .await + .unwrap(); + checkpointer + .set("Application".to_string(), app_bookmark.clone()) + .await + .unwrap(); + checkpointer + .set("Security".to_string(), security_bookmark.clone()) + .await + .unwrap(); + + assert_eq!( + checkpointer.get("System").await.unwrap().bookmark_xml, + system_bookmark + ); + assert_eq!( + checkpointer.get("Application").await.unwrap().bookmark_xml, + app_bookmark + ); + assert_eq!( + checkpointer.get("Security").await.unwrap().bookmark_xml, + security_bookmark + ); + } + + #[tokio::test] + async fn test_checkpoint_remove() { + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + let bookmark = test_bookmark_xml("System", 100); + checkpointer + .set("System".to_string(), bookmark) + .await + .unwrap(); + assert!(checkpointer.get("System").await.is_some()); + + checkpointer.remove("System").await.unwrap(); + assert!(checkpointer.get("System").await.is_none()); + } + + #[tokio::test] + async fn test_checkpoint_list() { + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + let system_bookmark = test_bookmark_xml("System", 100); + let app_bookmark = test_bookmark_xml("Application", 200); + + checkpointer + .set("System".to_string(), system_bookmark) + .await + .unwrap(); + checkpointer + .set("Application".to_string(), app_bookmark) + .await + .unwrap(); + + let checkpoints = checkpointer.list().await; + assert_eq!(checkpoints.len(), 2); + } + + #[tokio::test] + async fn test_corrupted_checkpoint_file() { + let temp_dir = TempDir::new().unwrap(); + let checkpoint_path = temp_dir.path().join(CHECKPOINT_FILENAME); + + // Write corrupted data + fs::write(&checkpoint_path, b"invalid json {{{") + .await + .unwrap(); + + // Should handle gracefully and start fresh + let checkpointer = Checkpointer::new(temp_dir.path()).await.unwrap(); + assert!(checkpointer.get("System").await.is_none()); + + // Should be able to write new checkpoints + let bookmark = test_bookmark_xml("System", 100); + checkpointer + .set("System".to_string(), bookmark.clone()) + .await + .unwrap(); + assert_eq!( + checkpointer.get("System").await.unwrap().bookmark_xml, + bookmark + ); + } + + #[tokio::test] + async fn test_checkpoint_batch_update() { + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + let system_bookmark = test_bookmark_xml("System", 100); + let app_bookmark = test_bookmark_xml("Application", 200); + let security_bookmark = test_bookmark_xml("Security", 300); + + // Batch update all channels at once + checkpointer + .set_batch(vec![ + ("System".to_string(), system_bookmark.clone()), + ("Application".to_string(), app_bookmark.clone()), + ("Security".to_string(), security_bookmark.clone()), + ]) + .await + .unwrap(); + + // Verify all channels were updated + assert_eq!( + checkpointer.get("System").await.unwrap().bookmark_xml, + system_bookmark + ); + assert_eq!( + checkpointer.get("Application").await.unwrap().bookmark_xml, + app_bookmark + ); + assert_eq!( + checkpointer.get("Security").await.unwrap().bookmark_xml, + security_bookmark + ); + } + + #[tokio::test] + async fn test_checkpoint_batch_empty() { + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + // Empty batch should succeed without writing + checkpointer.set_batch(vec![]).await.unwrap(); + + // No checkpoints should exist + assert!(checkpointer.list().await.is_empty()); + } + + #[tokio::test] + async fn test_checkpoint_batch_persistence() { + let temp_dir = TempDir::new().unwrap(); + + let system_bookmark = test_bookmark_xml("System", 100); + let app_bookmark = test_bookmark_xml("Application", 200); + + // Create first checkpointer and batch update + { + let checkpointer = Checkpointer::new(temp_dir.path()).await.unwrap(); + checkpointer + .set_batch(vec![ + ("System".to_string(), system_bookmark.clone()), + ("Application".to_string(), app_bookmark.clone()), + ]) + .await + .unwrap(); + } + + // Create new checkpointer (simulating restart) and verify persistence + { + let checkpointer = Checkpointer::new(temp_dir.path()).await.unwrap(); + assert_eq!( + checkpointer.get("System").await.unwrap().bookmark_xml, + system_bookmark + ); + assert_eq!( + checkpointer.get("Application").await.unwrap().bookmark_xml, + app_bookmark + ); + } + } +} diff --git a/src/sources/windows_event_log/config.rs b/src/sources/windows_event_log/config.rs new file mode 100644 index 0000000000000..95a8a2110615e --- /dev/null +++ b/src/sources/windows_event_log/config.rs @@ -0,0 +1,759 @@ +use std::{collections::HashMap, path::PathBuf}; + +use vector_config::component::GenerateConfig; +use vector_lib::configurable::configurable_component; + +use crate::{config::SourceAcknowledgementsConfig, serde::bool_or_struct}; + +// Validation constants +const MAX_CHANNEL_NAME_LENGTH: usize = 256; +const MAX_XPATH_QUERY_LENGTH: usize = 4096; +const MAX_FIELD_NAME_LENGTH: usize = 128; +const MAX_FIELD_COUNT: usize = 100; +const MAX_EVENT_ID_LIST_SIZE: usize = 1000; +const MAX_CHANNELS: usize = 63; // MAXIMUM_WAIT_OBJECTS (64) minus 1 for shutdown event +const MAX_CONNECTION_TIMEOUT_SECS: u64 = 3600; +const MAX_EVENT_TIMEOUT_MS: u64 = 60000; +const MAX_BATCH_SIZE: u32 = 10000; + +/// Configuration for the `windows_event_log` source. +#[configurable_component(source( + "windows_event_log", + "Collect logs from Windows Event Log channels using the Windows Event Log API." +))] +#[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] +pub struct WindowsEventLogConfig { + /// A comma-separated list of channels to read from. + /// + /// Common channels include "System", "Application", "Security", "Windows PowerShell". + /// Use Windows Event Viewer to discover available channels. + #[configurable(metadata(docs::examples = "System,Application,Security"))] + #[configurable(metadata(docs::examples = "System"))] + pub channels: Vec, + + /// The XPath query for filtering events. + /// + /// Allows filtering events using XML Path Language queries. + /// If not specified, all events from the specified channels will be collected. + #[configurable(metadata(docs::examples = "*[System[Level=1 or Level=2 or Level=3]]"))] + #[configurable(metadata( + docs::examples = "*[System[(Level=1 or Level=2 or Level=3) and TimeCreated[timediff(@SystemTime) <= 86400000]]]" + ))] + pub event_query: Option, + + /// Connection timeout in seconds for event subscription. + /// + /// This controls how long to wait for event subscription connection. + #[serde(default = "default_connection_timeout_secs")] + #[configurable(metadata(docs::examples = 30))] + #[configurable(metadata(docs::examples = 60))] + pub connection_timeout_secs: u64, + + /// Whether to read existing events or only new events. + /// + /// When set to `true`, the source will read all existing events from the channels. + /// When set to `false` (default), only new events will be read. + #[serde(default = "default_read_existing_events")] + pub read_existing_events: bool, + + /// Batch size for event processing. + /// + /// This controls how many events are processed in a single batch. + #[serde(default = "default_batch_size")] + #[configurable(metadata(docs::examples = 10))] + #[configurable(metadata(docs::examples = 100))] + pub batch_size: u32, + + /// Whether to include raw XML data in the output. + /// + /// When enabled, the raw XML representation of the event is included + /// in the `xml` field of the output event. + #[serde(default = "default_include_xml")] + pub include_xml: bool, + + /// Custom event data formatting options. + /// + /// Maps event field names to custom formatting options. + #[serde(default)] + #[configurable(metadata( + docs::additional_props_description = "An individual event data format override." + ))] + pub event_data_format: HashMap, + + /// Ignore specific event IDs. + /// + /// Events with these IDs will be filtered out and not sent downstream. + #[serde(default)] + #[configurable(metadata(docs::examples = 4624))] + #[configurable(metadata(docs::examples = 4625))] + #[configurable(metadata(docs::examples = 4634))] + pub ignore_event_ids: Vec, + + /// Only include specific event IDs. + /// + /// If specified, only events with these IDs will be processed. + /// Takes precedence over `ignore_event_ids`. + #[configurable(metadata(docs::examples = 1000))] + #[configurable(metadata(docs::examples = 1001))] + #[configurable(metadata(docs::examples = 1002))] + pub only_event_ids: Option>, + + /// Maximum age of events to process (in seconds). + /// + /// Events older than this value will be ignored. If not specified, + /// all events will be processed regardless of age. + #[configurable(metadata(docs::examples = 86400))] + #[configurable(metadata(docs::examples = 604800))] + pub max_event_age_secs: Option, + + /// Timeout in milliseconds for waiting for new events. + /// + /// Controls the maximum time `WaitForMultipleObjects` blocks before + /// returning to check for shutdown signals. Lower values increase + /// shutdown responsiveness at the cost of more frequent wake-ups. + #[serde(default = "default_event_timeout_ms")] + #[configurable(metadata(docs::examples = 5000))] + #[configurable(metadata(docs::examples = 10000))] + pub event_timeout_ms: u64, + + /// The namespace to use for logs. This overrides the global setting. + #[configurable(metadata(docs::hidden))] + #[serde(default)] + pub log_namespace: Option, + + /// Event field inclusion/exclusion patterns. + /// + /// Controls which event fields are included in the output. + #[serde(default)] + pub field_filter: FieldFilter, + + /// The directory where checkpoint data is stored. + /// + /// By default, the [global `data_dir` option][global_data_dir] is used. + /// Make sure the running user has write permissions to this directory. + /// + /// [global_data_dir]: https://vector.dev/docs/reference/configuration/global-options/#data_dir + #[serde(default)] + #[configurable(metadata(docs::examples = "/var/lib/vector"))] + #[configurable(metadata(docs::examples = "C:\\ProgramData\\vector"))] + #[configurable(metadata(docs::human_name = "Data Directory"))] + pub data_dir: Option, + + /// Maximum number of events to process per second. + /// + /// When set to a non-zero value, Vector will rate-limit event processing + /// to prevent overwhelming downstream systems. A value of 0 (default) means + /// no rate limiting is applied. + #[serde(default = "default_events_per_second")] + #[configurable(metadata(docs::examples = 100))] + #[configurable(metadata(docs::examples = 1000))] + #[configurable(metadata(docs::examples = 5000))] + pub events_per_second: u32, + + /// Maximum length for event data field values. + /// + /// Event data values longer than this will be truncated with "...\[truncated\]" appended. + /// Set to 0 for no limit. + #[serde(default = "default_max_event_data_length")] + #[configurable(metadata(docs::examples = 1024))] + #[configurable(metadata(docs::examples = 4096))] + pub max_event_data_length: usize, + + /// Interval in seconds between periodic checkpoint flushes. + /// + /// Controls how often bookmarks are persisted to disk in synchronous mode. + /// Lower values reduce the window of events that may be re-processed after + /// a crash, at the cost of more frequent disk writes. + #[serde(default = "default_checkpoint_interval_secs")] + #[configurable(metadata(docs::examples = 5))] + #[configurable(metadata(docs::examples = 1))] + #[configurable(metadata(docs::examples = 30))] + pub checkpoint_interval_secs: u64, + + /// Whether to render human-readable event messages. + /// + /// When enabled (default), Vector will use the Windows EvtFormatMessage API + /// to render localized, human-readable event messages with parameter + /// substitution. This matches the behavior of Windows Event Viewer. + /// + /// Provider DLL handles are cached per provider, so the performance cost + /// is limited to the first event from each provider. Disable only if you + /// do not need rendered messages and want to eliminate the DLL loads entirely. + #[serde(default = "default_render_message")] + pub render_message: bool, + + /// Controls how acknowledgements are handled for this source. + /// + /// When enabled, the source will wait for downstream sinks to acknowledge + /// receipt of events before updating checkpoints. This provides exactly-once + /// delivery guarantees at the cost of potential duplicate events on restart + /// if acknowledgements are pending. + /// + /// When disabled (default), checkpoints are updated immediately after reading + /// events, which may result in data loss if Vector crashes before events are + /// delivered to sinks. + #[configurable(derived)] + #[serde(default, deserialize_with = "bool_or_struct")] + pub acknowledgements: SourceAcknowledgementsConfig, +} + +/// Event data formatting options for custom field type conversion. +/// +/// These options control how specific event fields are formatted in the output. +/// Use `event_data_format` config to map field names to their desired format. +#[configurable_component] +#[derive(Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub enum EventDataFormat { + /// Format the field value as a string. + String, + /// Parse and format the field value as an integer. + Integer, + /// Parse and format the field value as a floating-point number. + Float, + /// Parse and format the field value as a boolean. + /// Recognizes "true", "1", "yes", "on" as true (case-insensitive). + Boolean, + /// Keep the original format unchanged (passthrough). + /// The field value will not be converted or modified. + Auto, +} + +/// Field filtering configuration. +#[configurable_component] +#[derive(Clone, Debug)] +pub struct FieldFilter { + /// Fields to include in the output. + /// + /// If specified, only these fields will be included. + pub include_fields: Option>, + + /// Fields to exclude from the output. + /// + /// These fields will be removed from the event data. + pub exclude_fields: Option>, + + /// Whether to include system fields. + /// + /// System fields include metadata like Computer, TimeCreated, etc. + #[serde(default = "default_include_system_fields")] + pub include_system_fields: bool, + + /// Whether to include event data fields. + /// + /// Event data fields contain application-specific data. + #[serde(default = "default_include_event_data")] + pub include_event_data: bool, + + /// Whether to include user data fields. + /// + /// User data fields contain additional custom data. + #[serde(default = "default_include_user_data")] + pub include_user_data: bool, +} + +impl Default for FieldFilter { + fn default() -> Self { + Self { + include_fields: None, + exclude_fields: None, + include_system_fields: default_include_system_fields(), + include_event_data: default_include_event_data(), + include_user_data: default_include_user_data(), + } + } +} + +impl Default for WindowsEventLogConfig { + fn default() -> Self { + Self { + channels: vec!["System".to_string(), "Application".to_string()], + event_query: None, + connection_timeout_secs: default_connection_timeout_secs(), + read_existing_events: default_read_existing_events(), + batch_size: default_batch_size(), + include_xml: default_include_xml(), + event_data_format: HashMap::new(), + ignore_event_ids: Vec::new(), + only_event_ids: None, + max_event_age_secs: None, + event_timeout_ms: default_event_timeout_ms(), + log_namespace: None, + field_filter: FieldFilter::default(), + data_dir: None, + events_per_second: default_events_per_second(), + max_event_data_length: default_max_event_data_length(), + checkpoint_interval_secs: default_checkpoint_interval_secs(), + render_message: default_render_message(), + acknowledgements: Default::default(), + } + } +} + +impl GenerateConfig for WindowsEventLogConfig { + fn generate_config() -> toml::Value { + toml::Value::try_from(WindowsEventLogConfig::default()).unwrap() + } +} + +impl WindowsEventLogConfig { + /// Validate the configuration. + pub fn validate(&self) -> Result<(), crate::Error> { + if self.channels.is_empty() { + return Err("At least one channel must be specified".into()); + } + + // WaitForMultipleObjects supports up to MAXIMUM_WAIT_OBJECTS (64) handles. + // One handle is reserved for the shutdown event, leaving 63 for channels. + if self.channels.len() > MAX_CHANNELS { + return Err(format!( + "Too many channels: {} specified, maximum is {} \ + (limited by WaitForMultipleObjects)", + self.channels.len(), + MAX_CHANNELS + ) + .into()); + } + + // Enhanced security validation for connection timeout to prevent DoS + if self.connection_timeout_secs == 0 + || self.connection_timeout_secs > MAX_CONNECTION_TIMEOUT_SECS + { + return Err(format!( + "Connection timeout must be between 1 and {} seconds", + MAX_CONNECTION_TIMEOUT_SECS + ) + .into()); + } + + // Validate event timeout + if self.event_timeout_ms == 0 || self.event_timeout_ms > MAX_EVENT_TIMEOUT_MS { + return Err(format!( + "Event timeout must be between 1 and {} milliseconds", + MAX_EVENT_TIMEOUT_MS + ) + .into()); + } + + // Validate checkpoint interval + if self.checkpoint_interval_secs == 0 || self.checkpoint_interval_secs > 3600 { + return Err("Checkpoint interval must be between 1 and 3600 seconds".into()); + } + + // Prevent resource exhaustion via excessive batch sizes + if self.batch_size == 0 || self.batch_size > MAX_BATCH_SIZE { + return Err(format!("Batch size must be between 1 and {}", MAX_BATCH_SIZE).into()); + } + + // Enhanced channel name validation with security checks + for channel in &self.channels { + if channel.trim().is_empty() { + return Err("Channel names cannot be empty".into()); + } + + // Prevent excessively long channel names + if channel.len() > MAX_CHANNEL_NAME_LENGTH { + return Err(format!( + "Channel name '{}' exceeds maximum length of {} characters", + channel, MAX_CHANNEL_NAME_LENGTH + ) + .into()); + } + + // Reject wildcard patterns - they cause heap corruption issues with many channels + if is_channel_pattern(channel) { + return Err(format!( + "Channel name '{}' contains wildcard characters (*, ?, [). \ + Wildcard patterns are not supported. Please specify exact channel names.", + channel + ) + .into()); + } + + // Reject control characters and null bytes. Actual channel name + // validation is handled by EvtOpenChannelConfig at subscription time, + // so we only block characters that could cause issues before that check. + if channel.chars().any(|c| c.is_control()) { + return Err( + format!("Channel name '{}' contains control characters", channel).into(), + ); + } + } + + // Enhanced XPath query validation with injection protection + if let Some(ref query) = self.event_query { + if query.trim().is_empty() { + return Err("Event query cannot be empty".into()); + } + + // Prevent excessively long XPath queries + if query.len() > MAX_XPATH_QUERY_LENGTH { + return Err(format!( + "Event query exceeds maximum length of {} characters", + MAX_XPATH_QUERY_LENGTH + ) + .into()); + } + + // Check for unbalanced brackets and parentheses + let mut bracket_count = 0i32; + let mut paren_count = 0i32; + + for ch in query.chars() { + match ch { + '[' => bracket_count += 1, + ']' => bracket_count -= 1, + '(' => paren_count += 1, + ')' => paren_count -= 1, + _ => {} + } + + // Check for negative counts (more closing than opening) + if bracket_count < 0 || paren_count < 0 { + return Err("Event query contains unbalanced brackets or parentheses".into()); + } + } + + // Check for unmatched opening brackets/parentheses + if bracket_count != 0 || paren_count != 0 { + return Err("Event query contains unbalanced brackets or parentheses".into()); + } + + // Check for potentially dangerous patterns that could indicate XPath injection. + // Note: We exclude "http:" and "https:" as they are legitimate in XML namespace URIs. + let dangerous_patterns = [ + "javascript:", + "vbscript:", + "file://", // Changed from "file:" to be more specific + "ftp:", + " MAX_EVENT_ID_LIST_SIZE { + return Err(format!( + "Only event IDs list cannot contain more than {} entries", + MAX_EVENT_ID_LIST_SIZE + ) + .into()); + } + } + + if self.ignore_event_ids.len() > MAX_EVENT_ID_LIST_SIZE { + return Err(format!( + "Ignore event IDs list cannot contain more than {} entries", + MAX_EVENT_ID_LIST_SIZE + ) + .into()); + } + + // Validate field filter settings + if let Some(ref include_fields) = self.field_filter.include_fields { + if include_fields.is_empty() { + return Err("Include fields list cannot be empty when specified".into()); + } + + if include_fields.len() > MAX_FIELD_COUNT { + return Err(format!( + "Include fields list cannot contain more than {} entries", + MAX_FIELD_COUNT + ) + .into()); + } + + for field in include_fields { + if field.trim().is_empty() || field.len() > MAX_FIELD_NAME_LENGTH { + return Err(format!("Invalid field name: '{}'", field).into()); + } + + // Enhanced security validation for field names + if field.contains('\0') + || field.contains('\r') + || field.contains('\n') + || field.contains('<') + || field.contains('>') + { + return Err(format!( + "Invalid field name contains dangerous characters: '{}'", + field + ) + .into()); + } + } + } + + if let Some(ref exclude_fields) = self.field_filter.exclude_fields { + if exclude_fields.is_empty() { + return Err("Exclude fields list cannot be empty when specified".into()); + } + + if exclude_fields.len() > MAX_FIELD_COUNT { + return Err(format!( + "Exclude fields list cannot contain more than {} entries", + MAX_FIELD_COUNT + ) + .into()); + } + + for field in exclude_fields { + if field.trim().is_empty() || field.len() > MAX_FIELD_NAME_LENGTH { + return Err(format!("Invalid field name: '{}'", field).into()); + } + + // Enhanced security validation for field names + if field.contains('\0') + || field.contains('\r') + || field.contains('\n') + || field.contains('<') + || field.contains('>') + { + return Err(format!( + "Invalid field name contains dangerous characters: '{}'", + field + ) + .into()); + } + } + } + + Ok(()) + } +} + +/// Check if a channel name contains glob pattern characters +pub fn is_channel_pattern(name: &str) -> bool { + name.contains('*') || name.contains('?') || name.contains('[') +} + +// Default value functions +const fn default_connection_timeout_secs() -> u64 { + 30 +} + +const fn default_event_timeout_ms() -> u64 { + 5000 +} + +const fn default_read_existing_events() -> bool { + false +} + +const fn default_batch_size() -> u32 { + 100 +} + +const fn default_include_xml() -> bool { + false +} + +const fn default_include_system_fields() -> bool { + true +} + +const fn default_include_event_data() -> bool { + true +} + +const fn default_include_user_data() -> bool { + true +} + +const fn default_events_per_second() -> u32 { + 0 // 0 means no rate limiting +} + +const fn default_max_event_data_length() -> usize { + 0 // 0 means no truncation +} + +const fn default_checkpoint_interval_secs() -> u64 { + 5 +} + +const fn default_render_message() -> bool { + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_config() { + let config = WindowsEventLogConfig::default(); + assert_eq!(config.channels, vec!["System", "Application"]); + assert_eq!(config.connection_timeout_secs, 30); + assert_eq!(config.event_timeout_ms, 5000); + assert!(!config.read_existing_events); + assert_eq!(config.batch_size, 100); + assert!(!config.include_xml); + assert!(config.render_message); + } + + #[test] + fn test_config_validation() { + let mut config = WindowsEventLogConfig::default(); + + // Valid configuration should pass + assert!(config.validate().is_ok()); + + // Empty channels should fail + config.channels = vec![]; + assert!(config.validate().is_err()); + + // Reset channels + config.channels = vec!["System".to_string()]; + assert!(config.validate().is_ok()); + + // Zero connection timeout should fail + config.connection_timeout_secs = 0; + assert!(config.validate().is_err()); + + // Reset connection timeout + config.connection_timeout_secs = 30; + assert!(config.validate().is_ok()); + + // Zero batch size should fail + config.batch_size = 0; + assert!(config.validate().is_err()); + + // Reset batch size + config.batch_size = 10; + assert!(config.validate().is_ok()); + + // Empty channel name should fail + config.channels = vec!["".to_string()]; + assert!(config.validate().is_err()); + + // Empty query should fail + config.channels = vec!["System".to_string()]; + config.event_query = Some("".to_string()); + assert!(config.validate().is_err()); + } + + #[test] + fn test_field_filter_default() { + let filter = FieldFilter::default(); + assert!(filter.include_system_fields); + assert!(filter.include_event_data); + assert!(filter.include_user_data); + assert!(filter.include_fields.is_none()); + assert!(filter.exclude_fields.is_none()); + } + + #[test] + fn test_serialization() { + let config = WindowsEventLogConfig { + channels: vec!["System".to_string(), "Application".to_string()], + event_query: Some("*[System[Level=1]]".to_string()), + connection_timeout_secs: 30, + read_existing_events: true, + batch_size: 50, + include_xml: true, + event_data_format: HashMap::new(), + ignore_event_ids: vec![4624, 4625], + only_event_ids: Some(vec![1000, 1001]), + max_event_age_secs: Some(86400), + event_timeout_ms: 5000, + log_namespace: Some(true), + field_filter: FieldFilter::default(), + data_dir: Some(PathBuf::from("/test/data")), + events_per_second: 1000, + max_event_data_length: 0, + checkpoint_interval_secs: 5, + render_message: true, + acknowledgements: SourceAcknowledgementsConfig::from(true), + }; + + // Should serialize and deserialize without errors + let serialized = serde_json::to_string(&config).expect("serialization should succeed"); + let deserialized: WindowsEventLogConfig = + serde_json::from_str(&serialized).expect("deserialization should succeed"); + + assert_eq!(config.channels, deserialized.channels); + assert_eq!(config.event_query, deserialized.event_query); + assert_eq!( + config.connection_timeout_secs, + deserialized.connection_timeout_secs + ); + assert_eq!( + config.read_existing_events, + deserialized.read_existing_events + ); + assert_eq!(config.batch_size, deserialized.batch_size); + assert_eq!(config.render_message, deserialized.render_message); + } + + #[test] + fn test_is_channel_pattern() { + // Exact channel names are not patterns + assert!(!is_channel_pattern("System")); + assert!(!is_channel_pattern("Application")); + assert!(!is_channel_pattern("Microsoft-Windows-Sysmon/Operational")); + + // Wildcard patterns + assert!(is_channel_pattern("Microsoft-Windows-*")); + assert!(is_channel_pattern("*")); + assert!(is_channel_pattern("Microsoft-Windows-Sysmon/*")); + + // Single character wildcard + assert!(is_channel_pattern("System?")); + assert!(is_channel_pattern("Microsoft-Windows-???")); + + // Character class patterns + assert!(is_channel_pattern("Microsoft-Windows-[A-Z]*")); + assert!(is_channel_pattern("[Ss]ystem")); + } + + #[test] + fn test_config_validation_rejects_wildcards() { + let mut config = WindowsEventLogConfig { + channels: vec!["Microsoft-Windows-*".to_string()], + ..Default::default() + }; + let err = config.validate().unwrap_err(); + assert!(err.to_string().contains("wildcard")); + + // Single character wildcards should be rejected + config.channels = vec!["System?".to_string()]; + let err = config.validate().unwrap_err(); + assert!(err.to_string().contains("wildcard")); + + // Character classes should be rejected + config.channels = vec!["[Ss]ystem".to_string()]; + let err = config.validate().unwrap_err(); + assert!(err.to_string().contains("wildcard")); + + // Mixed valid and wildcard should be rejected + config.channels = vec!["System".to_string(), "Microsoft-Windows-*".to_string()]; + let err = config.validate().unwrap_err(); + assert!(err.to_string().contains("wildcard")); + + // Exact channel names should still work + config.channels = vec![ + "System".to_string(), + "Application".to_string(), + "Microsoft-Windows-Sysmon/Operational".to_string(), + ]; + assert!(config.validate().is_ok()); + } +} diff --git a/src/sources/windows_event_log/error.rs b/src/sources/windows_event_log/error.rs new file mode 100644 index 0000000000000..1c2f6bfb83db4 --- /dev/null +++ b/src/sources/windows_event_log/error.rs @@ -0,0 +1,249 @@ +use snafu::Snafu; + +/// Errors that can occur when working with Windows Event Logs. +#[derive(Debug, Snafu)] +pub enum WindowsEventLogError { + #[snafu(display("Failed to open event log channel '{}': {}", channel, source))] + OpenChannelError { + channel: String, + source: windows::core::Error, + }, + + #[snafu(display("Failed to create event subscription: {}", source))] + CreateSubscriptionError { source: windows::core::Error }, + + #[snafu(display("Failed to query events: {}", source))] + QueryEventsError { source: windows::core::Error }, + + #[snafu(display("Failed to read event: {}", source))] + ReadEventError { source: windows::core::Error }, + + #[snafu(display("Failed to render event message: {}", source))] + RenderMessageError { source: windows::core::Error }, + + #[snafu(display("Failed to parse event XML: {}", source))] + ParseXmlError { source: quick_xml::Error }, + + #[snafu(display("Invalid XPath query '{}': {}", query, message))] + InvalidXPathQuery { query: String, message: String }, + + #[snafu(display( + "Access denied to channel '{}'. Administrator privileges may be required", + channel + ))] + AccessDeniedError { channel: String }, + + #[snafu(display("Channel '{}' not found", channel))] + ChannelNotFoundError { channel: String }, + + #[snafu(display("I/O error: {}", source))] + IoError { source: std::io::Error }, + + #[snafu(display("Event filtering error: {}", message))] + FilterError { message: String }, + + #[snafu(display("Configuration error: {}", message))] + ConfigError { message: String }, + + #[snafu(display("System resource exhausted: {}", message))] + ResourceExhaustedError { message: String }, + + #[snafu(display("Operation timeout after {} seconds", timeout_secs))] + TimeoutError { timeout_secs: u64 }, + + #[snafu(display("Failed to create render context: {}", source))] + CreateRenderContextError { source: windows::core::Error }, + + #[snafu(display("Failed to format message: {}", message))] + FormatMessageError { message: String }, + + #[snafu(display("Failed to render event: {}", message))] + RenderError { message: String }, + + #[snafu(display("Failed to create subscription: {}", source))] + SubscriptionError { source: windows::core::Error }, + + #[snafu(display("Failed to seek events: {}", source))] + SeekEventsError { source: windows::core::Error }, + + #[snafu(display("Failed to load publisher metadata for '{}': {}", provider, source))] + LoadPublisherMetadataError { + provider: String, + source: windows::core::Error, + }, + + #[snafu(display("Failed to pull events from channel '{}': {}", channel, source))] + PullEventsError { + channel: String, + source: windows::core::Error, + }, +} + +impl WindowsEventLogError { + /// Check if the error is recoverable and the operation should be retried. + pub const fn is_recoverable(&self) -> bool { + match self { + // Network/connection issues are typically recoverable + Self::QueryEventsError { .. } + | Self::ReadEventError { .. } + | Self::ResourceExhaustedError { .. } + | Self::TimeoutError { .. } + | Self::SeekEventsError { .. } + | Self::PullEventsError { .. } => true, + + // Configuration and permission issues are not recoverable + Self::OpenChannelError { .. } + | Self::CreateSubscriptionError { .. } + | Self::SubscriptionError { .. } + | Self::AccessDeniedError { .. } + | Self::ChannelNotFoundError { .. } + | Self::InvalidXPathQuery { .. } + | Self::ConfigError { .. } + | Self::CreateRenderContextError { .. } + | Self::LoadPublisherMetadataError { .. } => false, + + // Parsing errors might be recoverable depending on the specific error + Self::ParseXmlError { .. } + | Self::RenderMessageError { .. } + | Self::FormatMessageError { .. } + | Self::RenderError { .. } => false, + + // I/O errors could be temporary + Self::IoError { .. } => true, + + Self::FilterError { .. } => false, + } + } + + /// Get a user-friendly error message for logging. + pub fn user_message(&self) -> String { + match self { + Self::AccessDeniedError { channel } => { + format!( + "Access denied to event log channel '{}'. Try running Vector as Administrator.", + channel + ) + } + Self::ChannelNotFoundError { channel } => { + format!( + "Event log channel '{}' not found. Check the channel name and ensure the service is installed.", + channel + ) + } + Self::InvalidXPathQuery { query, .. } => { + format!("Invalid XPath query '{}'. Check the query syntax.", query) + } + Self::ResourceExhaustedError { .. } => { + "System resources exhausted. Consider reducing batch_size or poll_interval_secs." + .to_string() + } + Self::TimeoutError { timeout_secs } => { + format!( + "Operation timed out after {} seconds. Consider increasing timeout values.", + timeout_secs + ) + } + _ => self.to_string(), + } + } +} + +impl From for WindowsEventLogError { + fn from(error: quick_xml::Error) -> Self { + Self::ParseXmlError { source: error } + } +} + +// Bookmark persistence is handled via the checkpoint module (JSON-based) + +impl From for WindowsEventLogError { + fn from(error: std::io::Error) -> Self { + Self::IoError { source: error } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_recoverability() { + let recoverable_errors = vec![ + WindowsEventLogError::ResourceExhaustedError { + message: "test".to_string(), + }, + WindowsEventLogError::TimeoutError { timeout_secs: 30 }, + WindowsEventLogError::IoError { + source: std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout"), + }, + ]; + + for error in recoverable_errors { + assert!( + error.is_recoverable(), + "Error should be recoverable: {}", + error + ); + } + + let non_recoverable_errors = vec![ + WindowsEventLogError::AccessDeniedError { + channel: "Security".to_string(), + }, + WindowsEventLogError::ChannelNotFoundError { + channel: "NonExistent".to_string(), + }, + WindowsEventLogError::InvalidXPathQuery { + query: "invalid".to_string(), + message: "syntax error".to_string(), + }, + WindowsEventLogError::ConfigError { + message: "invalid config".to_string(), + }, + ]; + + for error in non_recoverable_errors { + assert!( + !error.is_recoverable(), + "Error should not be recoverable: {}", + error + ); + } + } + + #[test] + fn test_user_messages() { + let error = WindowsEventLogError::AccessDeniedError { + channel: "Security".to_string(), + }; + assert!(error.user_message().contains("Administrator")); + + let error = WindowsEventLogError::ChannelNotFoundError { + channel: "NonExistent".to_string(), + }; + assert!(error.user_message().contains("not found")); + + let error = WindowsEventLogError::InvalidXPathQuery { + query: "*[invalid]".to_string(), + message: "syntax error".to_string(), + }; + assert!(error.user_message().contains("XPath query")); + + let error = WindowsEventLogError::TimeoutError { timeout_secs: 30 }; + assert!(error.user_message().contains("timed out")); + } + + #[test] + fn test_error_conversions() { + let xml_error = quick_xml::Error::UnexpectedEof("test".to_string()); + let converted: WindowsEventLogError = xml_error.into(); + assert!(matches!( + converted, + WindowsEventLogError::ParseXmlError { .. } + )); + + let io_error = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "test"); + let converted: WindowsEventLogError = io_error.into(); + assert!(matches!(converted, WindowsEventLogError::IoError { .. })); + } +} diff --git a/src/sources/windows_event_log/integration_tests.rs b/src/sources/windows_event_log/integration_tests.rs new file mode 100644 index 0000000000000..cd67bc9398c7e --- /dev/null +++ b/src/sources/windows_event_log/integration_tests.rs @@ -0,0 +1,1686 @@ +#![cfg(feature = "sources-windows_event_log-integration-tests")] +#![cfg(test)] + +use std::collections::HashSet; +use std::process::Command; +use std::time::Duration; + +use futures::StreamExt; +use tokio::fs; + +use super::*; +use crate::config::{SourceAcknowledgementsConfig, SourceConfig, SourceContext}; +use crate::test_util::components::run_and_assert_source_compliance; +use vector_lib::event::EventStatus; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Emit a test event into the Application log via `eventcreate.exe`. +/// +/// Each test should use a unique `source` name (e.g. `"VT_stress"`) to +/// prevent cross-test pollution when tests run in parallel. The source +/// name is used as the Provider/@Name in the event, which tests then +/// filter on via XPath. +/// +/// Requires administrator privileges. Panics with a clear message if +/// `eventcreate` is missing or the call fails. +fn emit_event(source: &str, event_type: &str, event_id: u32, description: &str) { + // Retry a few times because eventcreate can transiently fail with exit + // code 1 when multiple tests invoke it concurrently (registry contention). + let max_retries = 3; + for attempt in 0..=max_retries { + let status = Command::new("eventcreate") + .args([ + "/L", + "Application", + "/T", + event_type, + "/ID", + &event_id.to_string(), + "/SO", + source, + "/D", + description, + ]) + .status() + .unwrap_or_else(|e| { + panic!( + "failed to start eventcreate (error: {e}); \ + ensure it is on PATH and tests run as Administrator" + ) + }); + + if status.success() { + return; + } + + if attempt < max_retries { + std::thread::sleep(Duration::from_millis(200 * (attempt as u64 + 1))); + } else { + panic!( + "eventcreate exited with {status} after {max_retries} retries; \ + run tests as Administrator" + ); + } + } +} + +fn temp_data_dir() -> tempfile::TempDir { + tempfile::tempdir().expect("failed to create temp data_dir for test") +} + +/// XPath query that matches events from a specific test source. +fn test_query(source: &str) -> String { + format!("*[System[Provider[@Name='{source}'] and EventID=1000]]") +} + +/// Build a config targeting Application + a specific test source. +fn test_config(source: &str, data_dir: &std::path::Path) -> WindowsEventLogConfig { + WindowsEventLogConfig { + data_dir: Some(data_dir.to_path_buf()), + channels: vec!["Application".to_string()], + event_query: Some(test_query(source)), + read_existing_events: true, + batch_size: 100, + event_timeout_ms: 2000, + ..Default::default() + } +} + +// --------------------------------------------------------------------------- +// Basic ingestion +// --------------------------------------------------------------------------- + +/// Verify the source can subscribe and receive at least one event with all +/// expected top-level fields present. +#[tokio::test] +async fn test_basic_event_ingestion() { + let data_dir = temp_data_dir(); + emit_event("VT_basic", "INFORMATION", 1000, "basic ingestion test"); + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["System".to_string(), "Application".to_string()], + read_existing_events: true, + batch_size: 10, + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + assert!( + !events.is_empty(), + "Expected at least one event from System or Application, got 0. \ + Verify the Windows Event Log service is running." + ); + + let log = events[0].as_log(); + for field in [ + "timestamp", + "message", + "provider_name", + "channel", + "event_id", + "level", + ] { + assert!( + log.contains(field), + "Event is missing required field '{field}'. \ + Full event keys: {:?}", + log.keys().into_iter().flatten().collect::>() + ); + } +} + +// --------------------------------------------------------------------------- +// Drain loop / backlog handling +// --------------------------------------------------------------------------- + +/// Emit N events, verify all N arrive with no duplicates. +/// This exercises the EvtNext drain loop across multiple batches. +#[tokio::test] +async fn test_backlog_drain_no_duplicates() { + let data_dir = temp_data_dir(); + let n = 50; + + for i in 0..n { + emit_event( + "VT_backlog", + "INFORMATION", + 1000, + &format!("backlog-drain-test-event-{i}"), + ); + } + + let config = test_config("VT_backlog", data_dir.path()); + let events = run_and_assert_source_compliance(config, Duration::from_secs(10), &[]).await; + + assert!( + events.len() >= n, + "Expected at least {n} events, got {}. \ + The drain loop may not be exhausting the channel. \ + Check pull_events batch limit and signal management.", + events.len() + ); + + // Check for duplicates via record_id + let mut record_ids = HashSet::new(); + let mut duplicate_count = 0; + for event in &events { + if let Some(rid) = event.as_log().get("record_id") { + if !record_ids.insert(rid.to_string_lossy()) { + duplicate_count += 1; + } + } + } + assert_eq!( + duplicate_count, + 0, + "Found {duplicate_count} duplicate record_ids out of {} events. \ + Bookmark advancement or checkpoint logic may be broken.", + events.len() + ); +} + +// --------------------------------------------------------------------------- +// Checkpoint / resume +// --------------------------------------------------------------------------- + +/// Run the source, stop it, emit new events, run again with the same +/// data_dir. The second run should see ONLY the new events. +#[tokio::test] +async fn test_checkpoint_resume_no_redelivery() { + let data_dir = temp_data_dir(); + + // Phase 1: emit and consume + emit_event("VT_ckptres", "INFORMATION", 1000, "checkpoint-test-phase1"); + let config = test_config("VT_ckptres", data_dir.path()); + let first_run = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + assert!( + !first_run.is_empty(), + "Phase 1 produced 0 events. Cannot test checkpoint resume." + ); + // Let checkpoint flush to disk + tokio::time::sleep(Duration::from_secs(1)).await; + + // Phase 2: emit one more, reuse same data_dir + emit_event("VT_ckptres", "INFORMATION", 1000, "checkpoint-test-phase2"); + let config = test_config("VT_ckptres", data_dir.path()); + let second_run = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + // Phase 1 event should NOT be redelivered (checkpoint should have advanced past it) + let has_phase1 = second_run.iter().any(|e| { + e.as_log() + .get("message") + .map(|m| m.to_string_lossy().contains("checkpoint-test-phase1")) + .unwrap_or(false) + }); + assert!( + !has_phase1, + "Phase 1 event was redelivered in phase 2 — checkpoint did not advance. \ + Check checkpoint persistence in data_dir: {:?}", + data_dir.path() + ); + + // The new phase 2 event should be present + let has_phase2 = second_run.iter().any(|e| { + e.as_log() + .get("message") + .map(|m| m.to_string_lossy().contains("checkpoint-test-phase2")) + .unwrap_or(false) + }); + assert!( + has_phase2, + "Phase 2 event not found in second run — checkpoint may have advanced past it. \ + Got {} events.", + second_run.len() + ); +} + +// --------------------------------------------------------------------------- +// Channel filtering +// --------------------------------------------------------------------------- + +/// Subscribe to System only, verify no Application events leak through. +#[tokio::test] +async fn test_channel_isolation() { + let data_dir = temp_data_dir(); + emit_event("VT_chaniso", "INFORMATION", 1000, "channel isolation test"); // goes to Application + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["System".to_string()], + read_existing_events: true, + batch_size: 20, + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(3), &[]).await; + + for event in &events { + if let Some(channel) = event.as_log().get("channel") { + let ch = channel.to_string_lossy(); + assert_eq!( + ch, "System", + "Got event from channel '{ch}' but only subscribed to System. \ + Channel filtering in EvtSubscribe may be broken." + ); + } + } +} + +// --------------------------------------------------------------------------- +// Event ID filtering +// --------------------------------------------------------------------------- + +/// Verify only_event_ids includes only matching events. +/// Note: eventcreate.exe only supports IDs 1-1000, so we use 999 and 1000. +#[tokio::test] +async fn test_only_event_ids_filter() { + let data_dir = temp_data_dir(); + emit_event("VT_onlyid", "INFORMATION", 999, "only-filter-exclude"); + emit_event("VT_onlyid", "INFORMATION", 1000, "only-filter-include"); + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + read_existing_events: true, + only_event_ids: Some(vec![1000]), + event_query: Some("*[System[Provider[@Name='VT_onlyid']]]".to_string()), + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + for event in &events { + if let Some(eid) = event.as_log().get("event_id") { + let id: i64 = match eid { + vrl::value::Value::Integer(i) => *i, + other => other.to_string_lossy().parse().unwrap_or(-1), + }; + assert_eq!( + id, 1000, + "only_event_ids=[1000] but got event_id={id}. \ + Event ID filtering in parse_event_xml may be broken." + ); + } + } +} + +/// Verify ignore_event_ids excludes matching events. +#[tokio::test] +async fn test_ignore_event_ids_filter() { + let data_dir = temp_data_dir(); + emit_event("VT_ignid", "INFORMATION", 1000, "ignore-filter-test"); + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + read_existing_events: true, + ignore_event_ids: vec![1000], + event_query: Some("*[System[Provider[@Name='VT_ignid']]]".to_string()), + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(3), &[]).await; + + for event in &events { + if let Some(eid) = event.as_log().get("event_id") { + let id: i64 = match eid { + vrl::value::Value::Integer(i) => *i, + other => other.to_string_lossy().parse().unwrap_or(-1), + }; + assert_ne!( + id, 1000, + "ignore_event_ids=[1000] but event_id=1000 was not filtered. \ + Check ignore_event_ids logic in parse_event_xml." + ); + } + } +} + +/// Verify that only_event_ids generates an XPath filter when no explicit +/// event_query is set. The existing `test_only_event_ids_filter` always sets +/// both `only_event_ids` AND `event_query`, so the auto-generated XPath path +/// in `build_xpath_query()` was never exercised — that is how the original +/// performance bug shipped. +/// +/// This test sets only_event_ids=[1000] WITHOUT event_query, so the source +/// must auto-generate `*[System[EventID=1000]]` and only receive matching +/// events from the Windows API. +#[tokio::test] +async fn test_only_event_ids_generates_xpath_filter() { + let data_dir = temp_data_dir(); + + // Emit events with different IDs. Only ID 1000 should be returned. + emit_event("VT_xpathid", "INFORMATION", 999, "xpath-filter-exclude"); + emit_event("VT_xpathid", "INFORMATION", 1000, "xpath-filter-include"); + emit_event("VT_xpathid", "INFORMATION", 998, "xpath-filter-exclude-2"); + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + read_existing_events: true, + only_event_ids: Some(vec![1000]), + // Intentionally NOT setting event_query — this forces + // build_xpath_query() to auto-generate the XPath from only_event_ids. + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + // We must receive at least one event (the 1000 we emitted). + assert!( + !events.is_empty(), + "Expected at least one event with ID 1000 from XPath-filtered subscription" + ); + + // Every event must have event_id == 1000. + for event in &events { + if let Some(eid) = event.as_log().get("event_id") { + let id: i64 = match eid { + vrl::value::Value::Integer(i) => *i, + other => other.to_string_lossy().parse().unwrap_or(-1), + }; + assert_eq!( + id, 1000, + "only_event_ids=[1000] (without event_query) but got event_id={id}. \ + XPath generation in build_xpath_query may be broken." + ); + } + } +} + +// --------------------------------------------------------------------------- +// Event level / type variety +// --------------------------------------------------------------------------- + +/// eventcreate supports INFORMATION, WARNING, ERROR. Verify all three produce +/// events with correct level names. +#[tokio::test] +async fn test_multiple_event_levels() { + let data_dir = temp_data_dir(); + emit_event("VT_levels", "INFORMATION", 1000, "level-test-info"); + emit_event("VT_levels", "WARNING", 1000, "level-test-warn"); + emit_event("VT_levels", "ERROR", 1000, "level-test-error"); + + let config = test_config("VT_levels", data_dir.path()); + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + let mut levels_seen: HashSet = HashSet::new(); + for event in &events { + if let Some(level) = event.as_log().get("level") { + levels_seen.insert(level.to_string_lossy().to_string()); + } + } + + for expected in ["Information", "Warning", "Error"] { + assert!( + levels_seen.contains(expected), + "Expected level '{expected}' in output but only saw: {levels_seen:?}. \ + level_name() mapping or EvtRender may not be extracting Level correctly." + ); + } +} + +// --------------------------------------------------------------------------- +// Rendered message +// --------------------------------------------------------------------------- + +/// With render_message enabled (the default), the message field should contain +/// the actual event description, not the generic fallback. +#[tokio::test] +async fn test_rendered_message_content() { + let data_dir = temp_data_dir(); + let marker = "rendered-message-test-unique-string-12345"; + emit_event("VT_render", "INFORMATION", 1000, marker); + + let mut config = test_config("VT_render", data_dir.path()); + config.render_message = true; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + let found = events.iter().any(|e| { + e.as_log() + .get("message") + .map(|m| m.to_string_lossy().contains(marker)) + .unwrap_or(false) + }); + + assert!( + found, + "render_message=true but no event message contains the marker '{marker}'. \ + EvtFormatMessage may be failing or the message field is using the generic fallback. \ + Got {} events, first message: {:?}", + events.len(), + events + .first() + .and_then(|e| e.as_log().get("message")) + .map(|m| m.to_string_lossy()) + ); +} + +/// With render_message disabled, events should still have a message field +/// (the generic fallback). +#[tokio::test] +async fn test_render_message_disabled_fallback() { + let data_dir = temp_data_dir(); + emit_event("VT_noren", "INFORMATION", 1000, "render disabled test"); + + let mut config = test_config("VT_noren", data_dir.path()); + config.render_message = false; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + if !events.is_empty() { + let log = events[0].as_log(); + assert!( + log.contains("message"), + "render_message=false should still produce a message field (generic fallback). \ + Event keys: {:?}", + log.keys().into_iter().flatten().collect::>() + ); + } +} + +// --------------------------------------------------------------------------- +// XML inclusion +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn test_include_xml_well_formed() { + let data_dir = temp_data_dir(); + emit_event("VT_xmlinc", "INFORMATION", 1000, "xml inclusion test"); + + let mut config = test_config("VT_xmlinc", data_dir.path()); + config.include_xml = true; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + assert!( + !events.is_empty(), + "Got 0 events, cannot verify XML inclusion." + ); + + let log = events[0].as_log(); + let xml = log.get("xml").expect( + "include_xml=true but 'xml' field missing. \ + Check raw_xml population in parse_event_xml.", + ); + let xml_str = xml.to_string_lossy(); + assert!( + xml_str.contains(""), + "XML field should contain well-formed ..., got: {}", + &xml_str[..xml_str.len().min(200)] + ); +} + +/// When include_xml is false (default), no xml field should be present. +#[tokio::test] +async fn test_exclude_xml_by_default() { + let data_dir = temp_data_dir(); + emit_event("VT_xmlexc", "INFORMATION", 1000, "xml exclusion test"); + + let mut config = test_config("VT_xmlexc", data_dir.path()); + config.include_xml = false; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + for event in &events { + assert!( + !event.as_log().contains("xml"), + "include_xml=false but 'xml' field is present." + ); + } +} + +// --------------------------------------------------------------------------- +// Field filtering +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn test_field_filter_exclude_event_data() { + let data_dir = temp_data_dir(); + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["System".to_string()], + read_existing_events: true, + field_filter: FieldFilter { + include_system_fields: true, + include_event_data: false, + include_user_data: false, + ..Default::default() + }, + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(3), &[]).await; + + for event in events.iter().take(10) { + let log = event.as_log(); + assert!( + !log.contains("event_data"), + "include_event_data=false but 'event_data' field is present." + ); + assert!( + !log.contains("user_data"), + "include_user_data=false but 'user_data' field is present." + ); + } +} + +// --------------------------------------------------------------------------- +// Resilience +// --------------------------------------------------------------------------- + +/// Short timeouts should not crash or panic. +#[tokio::test] +async fn test_short_timeouts_no_crash() { + let data_dir = temp_data_dir(); + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["System".to_string(), "Application".to_string()], + connection_timeout_secs: 5, + event_timeout_ms: 500, + batch_size: 5, + ..Default::default() + }; + + // If this panics, the test fails with a clear backtrace. + let _events = run_and_assert_source_compliance(config, Duration::from_secs(2), &[]).await; +} + +/// Invalid channel name should not crash — the source should skip it or +/// return a clear error. +#[tokio::test] +async fn test_nonexistent_channel_graceful_handling() { + let data_dir = temp_data_dir(); + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec![ + "Application".to_string(), + "ThisChannelDoesNotExist12345".to_string(), + ], + event_timeout_ms: 2000, + ..Default::default() + }; + + // Should not panic. May produce events from Application only, or may + // error on the bad channel — either is acceptable. + let _result = run_and_assert_source_compliance(config, Duration::from_secs(3), &[]).await; +} + +// --------------------------------------------------------------------------- +// Event structure completeness +// --------------------------------------------------------------------------- + +/// Verify all Windows Event Log fields that SOC/SIEM analysts depend on are +/// present and have reasonable values. +#[tokio::test] +async fn test_event_field_completeness() { + let data_dir = temp_data_dir(); + emit_event("VT_fields", "INFORMATION", 1000, "field completeness test"); + + let mut config = test_config("VT_fields", data_dir.path()); + config.include_xml = true; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + assert!( + !events.is_empty(), + "Got 0 events, cannot verify field completeness." + ); + + let log = events[0].as_log(); + + // Fields that must always be present + let required = [ + "timestamp", + "message", + "event_id", + "level", + "level_value", + "channel", + "provider_name", + "computer", + "record_id", + "process_id", + "thread_id", + ]; + + let mut missing = Vec::new(); + for field in &required { + if !log.contains(*field) { + missing.push(*field); + } + } + + assert!( + missing.is_empty(), + "Event is missing required fields: {missing:?}. \ + Present fields: {:?}. \ + This breaks SOC/SIEM ingestion pipelines that depend on these fields.", + log.keys().into_iter().flatten().collect::>() + ); + + // Verify event_id is a positive integer + if let Some(eid) = log.get("event_id") { + match eid { + vrl::value::Value::Integer(i) => { + assert!(*i > 0, "event_id should be a positive integer, got {i}") + } + other => panic!( + "event_id should be an integer, got: {other:?}. \ + Check parser set_windows_fields." + ), + } + } + + // Verify record_id is a positive integer + if let Some(rid) = log.get("record_id") { + match rid { + vrl::value::Value::Integer(i) => { + assert!(*i > 0, "record_id should be a positive integer, got {i}") + } + other => panic!("record_id should be an integer, got: {other:?}."), + } + } + + // Verify level is a human-readable string + if let Some(level) = log.get("level") { + let level_str = level.to_string_lossy(); + assert!( + ["Information", "Warning", "Error", "Critical", "Verbose"] + .contains(&level_str.as_ref()), + "level should be a human-readable name, got '{level_str}'. \ + Check level_name() mapping." + ); + } +} + +// --------------------------------------------------------------------------- +// Rate limiting +// --------------------------------------------------------------------------- + +/// With events_per_second set, events should still arrive but the source +/// should not exceed the configured rate over a sustained period. +#[tokio::test] +async fn test_rate_limiting() { + let data_dir = temp_data_dir(); + + // Emit a burst of events + for i in 0..20 { + emit_event( + "VT_rate", + "INFORMATION", + 1000, + &format!("rate-limit-test-{i}"), + ); + } + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + event_query: Some(test_query("VT_rate")), + read_existing_events: true, + events_per_second: 50, + batch_size: 100, + event_timeout_ms: 2000, + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + // With rate limiting enabled, we should still get events — the limiter + // throttles batch throughput, not total count over the run duration. + assert!( + !events.is_empty(), + "events_per_second=50 should still produce events, got 0. \ + Rate limiter may be blocking all batches." + ); +} + +// --------------------------------------------------------------------------- +// Event data truncation +// --------------------------------------------------------------------------- + +/// With max_event_data_length set, long event data values should be truncated. +#[tokio::test] +async fn test_event_data_truncation() { + let data_dir = temp_data_dir(); + + // eventcreate puts the description into the event message, not EventData. + // We verify truncation indirectly: the source should not crash and events + // should still arrive with the field present. + let long_desc = "A".repeat(500); + emit_event("VT_trunc", "INFORMATION", 1000, &long_desc); + + let mut config = test_config("VT_trunc", data_dir.path()); + config.max_event_data_length = 100; + config.include_xml = false; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + assert!( + !events.is_empty(), + "max_event_data_length=100 should not prevent event ingestion." + ); +} + +// --------------------------------------------------------------------------- +// Max event age filtering +// --------------------------------------------------------------------------- + +/// With max_event_age_secs set to a very low value, old events should be +/// filtered out. +#[tokio::test] +async fn test_max_event_age_filtering() { + let data_dir = temp_data_dir(); + + // Emit event, then configure a very short max age so it's already "old" + // by the time we read it. + emit_event("VT_maxage", "INFORMATION", 1000, "age-filter-test"); + + // Sleep so the event ages past the max_event_age_secs threshold. + // Use a generous buffer to avoid flakes from clock jitter on slow CI. + tokio::time::sleep(Duration::from_secs(5)).await; + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + event_query: Some(test_query("VT_maxage")), + read_existing_events: true, + max_event_age_secs: Some(3), // 3 seconds — our event is already ~5s old + event_timeout_ms: 2000, + ..Default::default() + }; + + // This may produce 0 events (filtered) or some events from other sources. + // The key assertion: the source should not crash. + let events = run_and_assert_source_compliance(config, Duration::from_secs(3), &[]).await; + + // If we got events, verify none of them are our old test event + for event in &events { + if let Some(msg) = event.as_log().get("message") { + assert!( + !msg.to_string_lossy().contains("age-filter-test"), + "max_event_age_secs=3 but old event was not filtered out. \ + Check age filtering in build_event." + ); + } + } +} + +// --------------------------------------------------------------------------- +// Event data format coercion +// --------------------------------------------------------------------------- + +/// With event_data_format configured, specific fields should be coerced +/// to the requested type. +#[tokio::test] +async fn test_event_data_format_coercion() { + let data_dir = temp_data_dir(); + emit_event("VT_format", "INFORMATION", 1000, "format coercion test"); + + let mut config = test_config("VT_format", data_dir.path()); + config.field_filter.include_event_data = true; + // event_id is a system field set as Integer by the parser, so test + // that event_data_format can convert it to a string + config.event_data_format.insert( + "event_id".to_string(), + super::config::EventDataFormat::String, + ); + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + assert!( + !events.is_empty(), + "event_data_format config should not prevent event ingestion." + ); + + // event_id should be converted to string by the custom formatter + let log = events[0].as_log(); + if let Some(eid) = log.get("event_id") { + assert!( + matches!(eid, vrl::value::Value::Bytes(_)), + "event_data_format set event_id to String but got {:?}. \ + Check apply_custom_formatting in parser.", + eid + ); + } +} + +// --------------------------------------------------------------------------- +// Multi-channel simultaneous ingestion +// --------------------------------------------------------------------------- + +/// Subscribe to both System and Application, verify events arrive from +/// both channels. +#[tokio::test] +async fn test_multi_channel_ingestion() { + let data_dir = temp_data_dir(); + emit_event("VT_multi", "INFORMATION", 1000, "multi channel test"); // Goes to Application + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["System".to_string(), "Application".to_string()], + read_existing_events: true, + batch_size: 50, + event_timeout_ms: 2000, + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + let mut channels_seen: HashSet = HashSet::new(); + for event in &events { + if let Some(channel) = event.as_log().get("channel") { + channels_seen.insert(channel.to_string_lossy().to_string()); + } + } + + // System always has events on any running Windows machine + assert!( + channels_seen.contains("System"), + "Subscribed to System but got no System events. \ + Channels seen: {channels_seen:?}" + ); + assert!( + channels_seen.contains("Application"), + "Subscribed to Application and emitted a test event but got no Application events. \ + Channels seen: {channels_seen:?}" + ); +} + +// --------------------------------------------------------------------------- +// Error path / metrics compliance +// --------------------------------------------------------------------------- + +/// When ALL channels are invalid, the source should exit gracefully +/// without panicking and produce no events. +#[tokio::test] +async fn test_all_channels_invalid_no_panic() { + let data_dir = temp_data_dir(); + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["ThisChannelDoesNotExist99999".to_string()], + event_timeout_ms: 1000, + ..Default::default() + }; + + // Build and run the source directly — don't use run_and_assert_source_compliance + // since with 0 valid channels we expect 0 events and no compliance metrics. + let (tx, _rx) = SourceSender::new_test(); + let cx = SourceContext::new_test(tx, None); + let source = config.build(cx).await.expect("source should build"); + + let timeout = tokio::time::timeout(Duration::from_secs(3), source).await; + + // Source should complete (Ok or Err) within the timeout, not hang. + assert!( + timeout.is_ok(), + "Source with all invalid channels should exit promptly, not hang." + ); +} + +/// Verify that when events are successfully ingested, the standard +/// component metrics (component_received_events_total, +/// component_received_bytes_total, etc.) are emitted correctly. +/// This is the happy-path compliance check with explicit metric verification. +#[tokio::test] +async fn test_source_compliance_metrics() { + let data_dir = temp_data_dir(); + emit_event("VT_comply", "INFORMATION", 1000, "compliance metrics test"); + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + read_existing_events: true, + batch_size: 50, + event_timeout_ms: 2000, + ..Default::default() + }; + + // run_and_assert_source_compliance validates: + // - BytesReceived, EventsReceived, EventsSent internal events + // - component_received_bytes_total (tagged with protocol) + // - component_received_events_total + // - component_received_event_bytes_total + // - component_sent_events_total + // - component_sent_event_bytes_total + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + assert!( + !events.is_empty(), + "Compliance test requires at least one event to validate metrics." + ); +} + +// --------------------------------------------------------------------------- +// Security validation +// --------------------------------------------------------------------------- + +/// Wildcard channel patterns must be rejected at config validation time, +/// not passed to EvtSubscribe where they can cause heap corruption with +/// many matching channels. +#[tokio::test] +async fn test_wildcard_channels_rejected() { + let wildcards = vec!["Microsoft-Windows-*", "*", "System?", "[Ss]ystem"]; + + for pattern in wildcards { + let config = WindowsEventLogConfig { + channels: vec![pattern.to_string()], + ..Default::default() + }; + + let result = config.validate(); + assert!( + result.is_err(), + "Wildcard pattern '{pattern}' should be rejected by config validation." + ); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("wildcard"), + "Error for '{pattern}' should mention wildcards, got: {err}" + ); + } +} + +/// XPath injection attempts must be rejected at config validation time. +#[tokio::test] +async fn test_xpath_injection_rejected() { + let attacks = vec![ + "javascript:alert('xss')", + "*[javascript:eval('code')]", + "file:///etc/passwd", + "", + ]; + + for attack in attacks { + let config = WindowsEventLogConfig { + channels: vec!["System".to_string()], + event_query: Some(attack.to_string()), + ..Default::default() + }; + + let result = config.validate(); + assert!( + result.is_err(), + "XPath injection '{attack}' should be rejected by config validation." + ); + } +} + +/// Channel names with control characters or null bytes must be rejected. +#[tokio::test] +async fn test_dangerous_channel_names_rejected() { + let dangerous = vec!["System\0", "System\r\nEvil", "System\n"]; + + for name in dangerous { + let config = WindowsEventLogConfig { + channels: vec![name.to_string()], + ..Default::default() + }; + + let result = config.validate(); + assert!( + result.is_err(), + "Dangerous channel name '{}' should be rejected.", + name.escape_debug() + ); + } +} + +/// Unbalanced XPath brackets/parentheses must be rejected. +#[tokio::test] +async fn test_unbalanced_xpath_rejected() { + let unbalanced = vec![ + "*[System[Level=1]", // missing closing ] + "*[System[(Level=1]]", // mismatched + ]; + + for query in &unbalanced { + let config = WindowsEventLogConfig { + channels: vec!["System".to_string()], + event_query: Some(query.to_string()), + ..Default::default() + }; + + let result = config.validate(); + assert!( + result.is_err(), + "Unbalanced XPath '{query}' should be rejected." + ); + } +} + +// --------------------------------------------------------------------------- +// Acknowledgement / checkpoint integrity +// --------------------------------------------------------------------------- + +/// With acknowledgements enabled, checkpoints should only advance after +/// events are delivered downstream. This is the at-least-once guarantee: +/// if Vector crashes before the sink acks, the checkpoint hasn't moved, +/// so events are re-read on restart. +/// +/// Test approach: run source with acks enabled and EventStatus::Delivered, +/// then restart with the same data_dir — the second run should skip +/// already-delivered events (proving checkpoint advanced after ack). +#[tokio::test] +async fn test_acknowledgements_checkpoint_after_delivery() { + let data_dir = temp_data_dir(); + + // Phase 1: emit event, run with acks enabled + Delivered status + emit_event("VT_ackdel", "INFORMATION", 1000, "ack-test-phase1"); + + { + let (tx, mut rx) = SourceSender::new_test_finalize(EventStatus::Delivered); + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + event_query: Some(test_query("VT_ackdel")), + read_existing_events: true, + batch_size: 100, + event_timeout_ms: 2000, + acknowledgements: SourceAcknowledgementsConfig::from(true), + ..Default::default() + }; + + let cx = SourceContext::new_test(tx, None); + let source = config.build(cx).await.expect("source should build"); + + let handle = tokio::spawn(source); + + // Collect events for a few seconds + let mut event_count = 0; + let deadline = tokio::time::Instant::now() + Duration::from_secs(5); + loop { + tokio::select! { + event = rx.next() => { + if event.is_some() { + event_count += 1; + } else { + break; + } + } + _ = tokio::time::sleep_until(deadline) => { + break; + } + } + } + + // Abort the source (simulates shutdown) + handle.abort(); + let _ = handle.await; + + assert!( + event_count > 0, + "Phase 1 with acks=true should produce events." + ); + } + + // Wait for checkpoint flush + tokio::time::sleep(Duration::from_secs(1)).await; + + // Verify checkpoint file exists + // Note: SourceContext::new_test uses ComponentKey "default", and + // resolve_and_make_data_subdir appends the component ID as a subdirectory. + let checkpoint_path = data_dir + .path() + .join("default") + .join("windows_event_log_checkpoints.json"); + assert!( + checkpoint_path.exists(), + "Checkpoint file should exist after acknowledged delivery. \ + Path: {:?}", + checkpoint_path + ); + + // Phase 2: emit a NEW event, run with same data_dir + emit_event("VT_ackdel", "INFORMATION", 1000, "ack-test-phase2"); + let config = test_config("VT_ackdel", data_dir.path()); + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + // Should NOT see phase1 event again (checkpoint advanced) + let has_phase1 = events.iter().any(|e| { + e.as_log() + .get("message") + .map(|m| m.to_string_lossy().contains("ack-test-phase1")) + .unwrap_or(false) + }); + assert!( + !has_phase1, + "Phase 1 events should not be redelivered after acknowledgement. \ + Checkpoint may not have advanced after ack." + ); +} + +// --------------------------------------------------------------------------- +// Checkpoint corruption recovery +// --------------------------------------------------------------------------- + +/// If the checkpoint file is corrupted (e.g., power loss mid-write), +/// the source should start fresh gracefully rather than crash-loop. +/// This tests the atomic-write recovery path. +#[tokio::test] +async fn test_checkpoint_corruption_recovery() { + let data_dir = temp_data_dir(); + + // Write garbage to the checkpoint file. + // Note: SourceContext::new_test uses ComponentKey "default", and + // resolve_and_make_data_subdir appends the component ID as a subdirectory. + let checkpoint_dir = data_dir.path().join("default"); + fs::create_dir_all(&checkpoint_dir) + .await + .expect("should be able to create checkpoint directory"); + let checkpoint_path = checkpoint_dir.join("windows_event_log_checkpoints.json"); + fs::write(&checkpoint_path, b"{{{{corrupted json garbage!!! \x00\xff") + .await + .expect("should be able to write corrupted checkpoint"); + + // Emit a test event + emit_event( + "VT_corrupt", + "INFORMATION", + 1000, + "corruption recovery test", + ); + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + event_query: Some(test_query("VT_corrupt")), + read_existing_events: true, + batch_size: 100, + event_timeout_ms: 2000, + ..Default::default() + }; + + // Source should start despite corrupted checkpoint and read events + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + assert!( + !events.is_empty(), + "Source should recover from corrupted checkpoint and ingest events. \ + Got 0 events — checkpoint corruption may be causing a crash." + ); +} + +// --------------------------------------------------------------------------- +// Rejected acknowledgement — checkpoint must NOT advance +// --------------------------------------------------------------------------- + +/// With acknowledgements enabled and EventStatus::Rejected, checkpoints +/// should NOT advance. This is the other half of at-least-once: if the +/// sink rejects events, the source must re-read them on restart. +#[tokio::test] +async fn test_rejected_ack_does_not_advance_checkpoint() { + let data_dir = temp_data_dir(); + + // Emit a distinctive event + emit_event("VT_rejack", "INFORMATION", 1000, "rejected-ack-test-marker"); + + // Phase 1: Run with acks enabled but Rejected status — checkpoint should NOT advance + { + let (tx, mut rx) = SourceSender::new_test_finalize(EventStatus::Rejected); + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + event_query: Some(test_query("VT_rejack")), + read_existing_events: true, + batch_size: 100, + event_timeout_ms: 2000, + acknowledgements: SourceAcknowledgementsConfig::from(true), + ..Default::default() + }; + + let cx = SourceContext::new_test(tx, None); + let source = config.build(cx).await.expect("source should build"); + let handle = tokio::spawn(source); + + // Drain events for a few seconds + let mut phase1_count = 0; + let deadline = tokio::time::Instant::now() + Duration::from_secs(5); + loop { + tokio::select! { + event = rx.next() => { + if event.is_some() { + phase1_count += 1; + } else { + break; + } + } + _ = tokio::time::sleep_until(deadline) => { + break; + } + } + } + + handle.abort(); + let _ = handle.await; + + assert!( + phase1_count > 0, + "Phase 1 should produce events even with Rejected status." + ); + } + + tokio::time::sleep(Duration::from_secs(1)).await; + + // Phase 2: Run again with same data_dir — should see the SAME events + // because checkpoint should not have advanced after rejection + let config = test_config("VT_rejack", data_dir.path()); + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + let has_marker = events.iter().any(|e| { + let log = e.as_log(); + // Check message field (rendered message or string_inserts fallback) + let in_message = log + .get("message") + .map(|m| m.to_string_lossy().contains("rejected-ack-test-marker")) + .unwrap_or(false); + // Also check string_inserts directly in case EvtFormatMessage is unavailable + // on this CI runner and the fallback doesn't surface the description. + let in_inserts = log + .get("string_inserts") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .any(|v| v.to_string_lossy().contains("rejected-ack-test-marker")) + }) + .unwrap_or(false); + in_message || in_inserts + }); + assert!( + has_marker, + "Events should be redelivered after rejected acknowledgement. \ + Checkpoint may have advanced despite rejection — at-least-once violated. \ + Got {} events in phase 2.", + events.len() + ); +} + +// --------------------------------------------------------------------------- +// Concurrent stress test +// --------------------------------------------------------------------------- + +/// Emit a burst of events and verify all arrive without drops or corruption. +/// Exercises buffer resizing, batch draining, and checkpoint batching under +/// heavier load than the basic backlog test. +#[tokio::test] +async fn test_stress_burst_ingestion() { + let data_dir = temp_data_dir(); + let n = 200; + + for i in 0..n { + emit_event( + "VT_stress", + "INFORMATION", + 1000, + &format!("stress-test-event-{i}"), + ); + } + + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec!["Application".to_string()], + event_query: Some(test_query("VT_stress")), + read_existing_events: true, + batch_size: 50, // Multiple batches required + event_timeout_ms: 2000, + ..Default::default() + }; + + let events = run_and_assert_source_compliance(config, Duration::from_secs(15), &[]).await; + + assert!( + events.len() >= n, + "Expected at least {n} events under burst load, got {}. \ + Drain loop may be exiting early or losing events under pressure.", + events.len() + ); + + // Verify no duplicates + let mut record_ids = HashSet::new(); + let mut dups = 0; + for event in &events { + if let Some(rid) = event.as_log().get("record_id") { + if !record_ids.insert(rid.to_string_lossy()) { + dups += 1; + } + } + } + assert_eq!( + dups, 0, + "Found {dups} duplicate record_ids in {n}-event stress test." + ); + + // Verify no event has empty/missing critical fields (corruption check) + for event in events.iter().take(50) { + let log = event.as_log(); + for field in ["event_id", "record_id", "channel", "provider_name"] { + assert!( + log.contains(field), + "Stress test event missing field '{field}' — possible render corruption." + ); + } + } +} + +// --------------------------------------------------------------------------- +// Resubscribe after log clear +// --------------------------------------------------------------------------- + +/// Helper: write an event to a custom log channel via PowerShell Write-EventLog. +fn write_custom_log_event(log_name: &str, source: &str, event_id: u32, message: &str) { + let status = Command::new("powershell") + .args([ + "-NoProfile", + "-Command", + &format!( + "Write-EventLog -LogName '{}' -Source '{}' -EventId {} -EntryType Information -Message '{}'", + log_name, source, event_id, message + ), + ]) + .status() + .expect("failed to run powershell Write-EventLog"); + assert!(status.success(), "Write-EventLog failed with {status}"); +} + +/// Clear a dedicated custom event log mid-run, verify the source recovers +/// via resubscription and continues ingesting new events. +/// +/// Uses a temporary custom log channel (created via PowerShell New-EventLog) +/// instead of Application, so clearing it doesn't destroy events that other +/// parallel tests depend on. +/// +/// Requires Administrator privileges. +#[tokio::test] +async fn test_resubscribe_after_log_clear() { + let log_name = "VectorTestResub"; + let source_name = "VT_resub"; + + // Create dedicated log channel for this test + let create_result = Command::new("powershell") + .args([ + "-NoProfile", + "-Command", + &format!( + "if (-not [System.Diagnostics.EventLog]::SourceExists('{source_name}')) {{ \ + New-EventLog -LogName '{log_name}' -Source '{source_name}' \ + }}" + ), + ]) + .status(); + + match create_result { + Ok(status) if status.success() => {} + _ => { + // Can't create custom log — skip gracefully + return; + } + } + + // Ensure cleanup on all exit paths + struct CleanupGuard { + log_name: &'static str, + } + impl Drop for CleanupGuard { + fn drop(&mut self) { + let _ = Command::new("powershell") + .args([ + "-NoProfile", + "-Command", + &format!( + "Remove-EventLog -LogName '{}' -ErrorAction SilentlyContinue", + self.log_name + ), + ]) + .status(); + } + } + let _cleanup = CleanupGuard { + log_name: "VectorTestResub", + }; + + let data_dir = temp_data_dir(); + + // Emit an initial event into our dedicated channel + write_custom_log_event(log_name, source_name, 1000, "pre-clear-event"); + + let (tx, mut rx) = SourceSender::new_test_finalize(EventStatus::Delivered); + let config = WindowsEventLogConfig { + data_dir: Some(data_dir.path().to_path_buf()), + channels: vec![log_name.to_string()], + read_existing_events: true, + batch_size: 100, + event_timeout_ms: 1000, + acknowledgements: SourceAcknowledgementsConfig::from(true), + ..Default::default() + }; + + let cx = SourceContext::new_test(tx, None); + let source = config.build(cx).await.expect("source should build"); + let handle = tokio::spawn(source); + + // Wait for initial events to be consumed + let deadline = tokio::time::Instant::now() + Duration::from_secs(3); + loop { + tokio::select! { + event = rx.next() => { + if event.is_none() { + break; + } + } + _ = tokio::time::sleep_until(deadline) => { + break; + } + } + } + + // Clear our dedicated log — does NOT affect Application or other tests + let clear_result = Command::new("wevtutil").args(["cl", log_name]).status(); + + match clear_result { + Ok(status) if status.success() => { + // Log was cleared. Emit a new event and verify it arrives. + tokio::time::sleep(Duration::from_secs(1)).await; + write_custom_log_event(log_name, source_name, 1000, "post-clear-event"); + + let mut found_post_clear = false; + let deadline = tokio::time::Instant::now() + Duration::from_secs(8); + loop { + tokio::select! { + event = rx.next() => { + if let Some(event) = event { + if let Some(msg) = event.as_log().get("message") { + if msg.to_string_lossy().contains("post-clear-event") { + found_post_clear = true; + break; + } + } + } else { + break; + } + } + _ = tokio::time::sleep_until(deadline) => { + break; + } + } + } + + handle.abort(); + let _ = handle.await; + + assert!( + found_post_clear, + "After log clear, the source should resubscribe and receive new events. \ + The post-clear event was not received — resubscribe_channel may be broken." + ); + } + _ => { + // wevtutil cl failed — skip gracefully + handle.abort(); + let _ = handle.await; + } + } +} + +// --------------------------------------------------------------------------- +// Custom metrics — indirect verification +// --------------------------------------------------------------------------- + +/// Verify that reading events produces the expected checkpoint file, +/// proving the full data path (EvtNext -> render -> parse -> emit -> +/// checkpoint) works end-to-end including the metric-instrumented code paths. +/// +/// Note: Direct custom metric assertions (windows_event_log_events_read_total +/// etc.) are not feasible without adding metrics-util/debugging as a +/// dependency. Instead, we verify the observable side effects: events arrive, +/// checkpoint file is written, and compliance metrics pass. +#[tokio::test] +async fn test_full_data_path_produces_checkpoint() { + let data_dir = temp_data_dir(); + + for i in 0..5 { + emit_event( + "VT_fullck", + "INFORMATION", + 1000, + &format!("checkpoint-path-test-{i}"), + ); + } + + let config = test_config("VT_fullck", data_dir.path()); + let events = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + assert!( + events.len() >= 5, + "Expected at least 5 events, got {}.", + events.len() + ); + + // Wait for checkpoint flush + tokio::time::sleep(Duration::from_secs(1)).await; + + // Note: SourceContext::new_test uses ComponentKey "default", and + // resolve_and_make_data_subdir appends the component ID as a subdirectory. + let checkpoint_path = data_dir + .path() + .join("default") + .join("windows_event_log_checkpoints.json"); + assert!( + checkpoint_path.exists(), + "Checkpoint file should be written after successful event processing. \ + This proves the full path: EvtNext -> render -> parse -> emit -> checkpoint." + ); + + // Verify checkpoint file is valid JSON with expected structure + let contents = fs::read_to_string(&checkpoint_path) + .await + .expect("should read checkpoint file"); + assert!( + contents.contains("\"version\"") && contents.contains("\"channels\""), + "Checkpoint file should contain valid JSON with version and channels. \ + Got: {}", + &contents[..contents.len().min(200)] + ); +} + +// --------------------------------------------------------------------------- +// Checkpoint resume: no duplicate record IDs across runs +// --------------------------------------------------------------------------- + +/// Run the source twice with the same data_dir, emitting distinct events +/// before each run. Assert that the record_id sets from run 1 and run 2 +/// do not overlap, proving the bookmark/checkpoint correctly prevents +/// re-delivery. +#[tokio::test] +async fn test_checkpoint_resume_no_duplicate_record_ids() { + let data_dir = temp_data_dir(); + + // Phase 1: emit events and collect record IDs + for i in 0..5 { + emit_event( + "VT_ckptdup", + "INFORMATION", + 1000, + &format!("ckpt-dup-test-phase1-{i}"), + ); + } + + let config = test_config("VT_ckptdup", data_dir.path()); + let first_run = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + assert!( + !first_run.is_empty(), + "Phase 1 produced 0 events. Cannot test checkpoint resume." + ); + + let first_ids: HashSet = first_run + .iter() + .filter_map(|e| { + e.as_log() + .get("record_id") + .map(|v| v.to_string_lossy().into_owned()) + }) + .collect(); + assert!( + !first_ids.is_empty(), + "Phase 1 events have no record_id field. Cannot verify uniqueness." + ); + + // Let checkpoint flush to disk + tokio::time::sleep(Duration::from_secs(1)).await; + + // Phase 2: emit new events, reuse same data_dir + for i in 0..5 { + emit_event( + "VT_ckptdup", + "INFORMATION", + 1000, + &format!("ckpt-dup-test-phase2-{i}"), + ); + } + + let config = test_config("VT_ckptdup", data_dir.path()); + let second_run = run_and_assert_source_compliance(config, Duration::from_secs(5), &[]).await; + + let second_ids: HashSet = second_run + .iter() + .filter_map(|e| { + e.as_log() + .get("record_id") + .map(|v| v.to_string_lossy().into_owned()) + }) + .collect(); + + // Allow a small overlap: the test harness uses a timeout-based shutdown that + // can fire between send_batch (events collected) and finalize (checkpoint + // written). On multi-core runners, the last in-flight batch may be sent but + // not checkpointed, causing re-delivery of up to batch_size events. + // The important invariant is that the checkpoint prevents FULL re-delivery. + let batch_size = 100; // matches test_config + let overlap: HashSet<_> = first_ids.intersection(&second_ids).collect(); + assert!( + overlap.len() <= batch_size, + "Found {} duplicate record_ids between run 1 and run 2 (max allowed: {}): {:?}. \ + Bookmark checkpoint is not preventing re-delivery. \ + Run 1 had {} IDs, run 2 had {} IDs.", + overlap.len(), + batch_size, + overlap, + first_ids.len(), + second_ids.len() + ); + + // But we should still see meaningful checkpoint progress — run 2 must not + // re-deliver the entire run 1 set. + if !first_ids.is_empty() { + assert!( + second_ids.len() < first_ids.len() + 10, + "Run 2 returned {} events vs run 1's {} — checkpoint may not be advancing at all.", + second_ids.len(), + first_ids.len() + ); + } +} diff --git a/src/sources/windows_event_log/metadata.rs b/src/sources/windows_event_log/metadata.rs new file mode 100644 index 0000000000000..26fc4b244cb97 --- /dev/null +++ b/src/sources/windows_event_log/metadata.rs @@ -0,0 +1,234 @@ +use std::collections::HashMap; +use std::num::NonZeroUsize; + +use lru::LruCache; +use metrics::Counter; +use windows::Win32::System::EventLog::{ + EVT_HANDLE, EvtFormatMessage, EvtFormatMessageEvent, EvtFormatMessageKeyword, + EvtFormatMessageOpcode, EvtFormatMessageTask, EvtOpenPublisherMetadata, +}; +use windows::core::HSTRING; + +use super::subscription::{FORMAT_CACHE_CAPACITY, PublisherHandle}; + +/// Resolves task, opcode, and keyword names from provider metadata via EvtFormatMessage. +pub fn resolve_event_metadata( + publisher_cache: &mut LruCache, + format_cache: &mut HashMap>>, + cache_hits_counter: &Counter, + cache_misses_counter: &Counter, + event_handle: EVT_HANDLE, + provider_name: &str, + task: u64, + opcode: u64, + keywords: u64, +) -> (Option, Option, Vec) { + let raw_handle = get_or_open_publisher(publisher_cache, provider_name); + + if raw_handle == 0 { + return (None, None, Vec::new()); + } + + let metadata_handle = EVT_HANDLE(raw_handle); + + let task_flag = EvtFormatMessageTask.0 as u32; + let opcode_flag = EvtFormatMessageOpcode.0 as u32; + let keyword_flag = EvtFormatMessageKeyword.0 as u32; + + let task_name = cached_format( + format_cache, + cache_hits_counter, + cache_misses_counter, + metadata_handle, + event_handle, + provider_name, + task_flag, + task, + ); + let opcode_name = cached_format( + format_cache, + cache_hits_counter, + cache_misses_counter, + metadata_handle, + event_handle, + provider_name, + opcode_flag, + opcode, + ); + let keyword_str = cached_format( + format_cache, + cache_hits_counter, + cache_misses_counter, + metadata_handle, + event_handle, + provider_name, + keyword_flag, + keywords, + ); + + let keyword_names = keyword_str + .map(|s| { + s.split(';') + .map(|k| k.trim().to_string()) + .filter(|k| !k.is_empty()) + .collect() + }) + .unwrap_or_default(); + + (task_name, opcode_name, keyword_names) +} + +fn get_or_open_publisher( + cache: &mut LruCache, + provider_name: &str, +) -> isize { + if let Some(handle) = cache.get(provider_name) { + return handle.0; + } + + let provider_hstring = HSTRING::from(provider_name); + let raw = unsafe { + EvtOpenPublisherMetadata(None, &provider_hstring, None, 0, 0) + .map(|h| h.0) + .unwrap_or(0) + }; + + cache.put(provider_name.to_string(), PublisherHandle(raw)); + raw +} + +/// Two-level cache lookup: outer HashMap keyed by `&str` (zero allocation), +/// inner LRU keyed by `(flag, field_value)`. +fn cached_format( + cache: &mut HashMap>>, + cache_hits_counter: &Counter, + cache_misses_counter: &Counter, + metadata_handle: EVT_HANDLE, + event_handle: EVT_HANDLE, + provider: &str, + flag: u32, + field_value: u64, +) -> Option { + let inner_key = (flag, field_value); + + // Fast path: borrowed &str lookup on outer HashMap — zero allocation. + // peek() intentionally skips LRU promotion — get() requires &mut which + // would need get_mut() on the outer HashMap. The put() on every miss + // already handles insertion/promotion, so peek is correct here. + if let Some(inner) = cache.get(provider) { + if let Some(cached) = inner.peek(&inner_key) { + cache_hits_counter.increment(1); + return cached.clone(); + } + } + + // Slow path: call API and populate cache + cache_misses_counter.increment(1); + let result = format_metadata_field(metadata_handle, event_handle, flag); + let inner = cache + .entry(provider.to_string()) + .or_insert_with(|| LruCache::new(NonZeroUsize::new(FORMAT_CACHE_CAPACITY).unwrap())); + inner.put(inner_key, result.clone()); + result +} + +fn format_metadata_field( + metadata_handle: EVT_HANDLE, + event_handle: EVT_HANDLE, + flags: u32, +) -> Option { + let mut buffer_used: u32 = 0; + let _ = unsafe { + EvtFormatMessage( + metadata_handle, + event_handle, + 0, + None, + flags, + None, + &mut buffer_used, + ) + }; + + if buffer_used == 0 || buffer_used > 4096 { + return None; + } + + let mut buffer = vec![0u16; buffer_used as usize]; + let mut actual_used: u32 = 0; + let result = unsafe { + EvtFormatMessage( + metadata_handle, + event_handle, + 0, + None, + flags, + Some(&mut buffer), + &mut actual_used, + ) + }; + + if result.is_err() { + return None; + } + + let len = buffer.iter().position(|&c| c == 0).unwrap_or(buffer.len()); + let s = String::from_utf16_lossy(&buffer[..len]); + if s.is_empty() { None } else { Some(s) } +} + +/// Renders a human-readable event message using the Windows EvtFormatMessage API. +pub fn format_event_message( + publisher_cache: &mut LruCache, + event_handle: EVT_HANDLE, + provider_name: &str, +) -> Option { + let raw_handle = get_or_open_publisher(publisher_cache, provider_name); + + if raw_handle == 0 { + return None; + } + + let metadata_handle = EVT_HANDLE(raw_handle); + let flags = EvtFormatMessageEvent.0 as u32; + let max_size = 64 * 1024; + + let mut buffer_used: u32 = 0; + let _ = unsafe { + EvtFormatMessage( + metadata_handle, + event_handle, + 0, + None, + flags, + None, + &mut buffer_used, + ) + }; + + if buffer_used == 0 || buffer_used as usize > max_size { + return None; + } + + let mut buffer = vec![0u16; buffer_used as usize]; + let mut actual_used: u32 = 0; + let result = unsafe { + EvtFormatMessage( + metadata_handle, + event_handle, + 0, + None, + flags, + Some(&mut buffer), + &mut actual_used, + ) + }; + + if result.is_err() { + return None; + } + + let len = buffer.iter().position(|&c| c == 0).unwrap_or(buffer.len()); + let s = String::from_utf16_lossy(&buffer[..len]); + if s.is_empty() { None } else { Some(s) } +} diff --git a/src/sources/windows_event_log/mod.rs b/src/sources/windows_event_log/mod.rs new file mode 100644 index 0000000000000..17d900cc07622 --- /dev/null +++ b/src/sources/windows_event_log/mod.rs @@ -0,0 +1,593 @@ +use async_trait::async_trait; +use vector_lib::config::LogNamespace; +use vrl::value::{Kind, kind::Collection}; + +use vector_config::component::SourceDescription; + +use crate::config::{DataType, SourceConfig, SourceContext, SourceOutput}; + +// Cross-platform: config types (pure serde structs, no Windows dependencies) +mod config; +pub use self::config::*; + +cfg_if::cfg_if! { + if #[cfg(windows)] { + mod bookmark; + mod checkpoint; + pub mod error; + mod metadata; + mod parser; + mod render; + mod sid_resolver; + mod subscription; + mod xml_parser; + + use std::path::PathBuf; + use std::sync::Arc; + + use futures::StreamExt; + use vector_lib::EstimatedJsonEncodedSizeOf; + use vector_lib::finalizer::OrderedFinalizer; + use vector_lib::internal_event::{ + ByteSize, BytesReceived, CountByteSize, InternalEventHandle as _, Protocol, + }; + use windows::Win32::Foundation::{DUPLICATE_SAME_ACCESS, DuplicateHandle, HANDLE}; + use windows::Win32::System::Threading::GetCurrentProcess; + + use crate::{ + SourceSender, + event::{BatchNotifier, BatchStatus, BatchStatusReceiver}, + internal_events::{ + EventsReceived, StreamClosedError, WindowsEventLogParseError, WindowsEventLogQueryError, + }, + shutdown::ShutdownSignal, + }; + + use self::{ + checkpoint::Checkpointer, + error::WindowsEventLogError, + parser::EventLogParser, + subscription::{EventLogSubscription, WaitResult}, + }; + } +} + +#[cfg(all(test, windows))] +mod tests; + +// Integration tests are feature-gated to avoid requiring Windows Event Log service. +// To run integration tests on Windows: cargo test --features sources-windows_event_log-integration-tests +#[cfg(all(test, windows, feature = "sources-windows_event_log-integration-tests"))] +mod integration_tests; + +cfg_if::cfg_if! { +if #[cfg(windows)] { + +/// Entry for the acknowledgment finalizer containing checkpoint information. +/// Each entry represents a batch of events that need to be acknowledged before +/// the checkpoint can be safely updated. Contains all channel bookmarks from +/// the batch since a single batch may span multiple channels. +#[derive(Debug, Clone)] +struct FinalizerEntry { + /// Channel bookmarks: (channel_name, bookmark_xml) pairs + bookmarks: Vec<(String, String)>, +} + +/// Shared checkpointer type for use with the finalizer +type SharedCheckpointer = Arc; + +/// Finalizer for handling acknowledgments. +/// Supports both synchronous (immediate checkpoint) and asynchronous (deferred checkpoint) modes. +enum Finalizer { + /// Synchronous mode: checkpoints are updated immediately after reading events. + /// Used when acknowledgements are disabled. + Sync(SharedCheckpointer), + /// Asynchronous mode: checkpoints are updated only after downstream sinks acknowledge receipt. + /// Used when acknowledgements are enabled. + Async(OrderedFinalizer), +} + +impl Finalizer { + /// Create a new finalizer based on acknowledgement configuration. + fn new( + acknowledgements: bool, + checkpointer: SharedCheckpointer, + shutdown: ShutdownSignal, + ) -> Self { + if acknowledgements { + let (finalizer, mut ack_stream) = + OrderedFinalizer::::new(Some(shutdown.clone())); + + // Spawn background task to process acknowledgments and update checkpoints + tokio::spawn(async move { + while let Some((status, entry)) = ack_stream.next().await { + if status == BatchStatus::Delivered { + if let Err(e) = checkpointer.set_batch(entry.bookmarks.clone()).await { + warn!( + message = "Failed to update checkpoint after acknowledgement.", + error = %e + ); + } else { + debug!( + message = "Checkpoint updated after acknowledgement.", + channels = entry.bookmarks.len() + ); + } + } else { + debug!( + message = "Events not delivered, checkpoint not updated.", + status = ?status + ); + } + } + debug!(message = "Acknowledgement stream completed."); + }); + + Self::Async(finalizer) + } else { + Self::Sync(checkpointer) + } + } + + /// Finalize a batch of events. + /// In sync mode, immediately updates the checkpoint. + /// In async mode, registers the entry for deferred checkpoint update. + async fn finalize(&self, entry: FinalizerEntry, receiver: Option) { + match (self, receiver) { + (Self::Sync(checkpointer), None) => { + if let Err(e) = checkpointer.set_batch(entry.bookmarks.clone()).await { + warn!( + message = "Failed to update checkpoint.", + error = %e + ); + } + } + (Self::Async(finalizer), Some(receiver)) => { + finalizer.add(entry, receiver); + } + (Self::Sync(_), Some(_)) => { + warn!(message = "Received acknowledgement receiver in sync mode, ignoring."); + } + (Self::Async(_), None) => { + warn!( + message = "No acknowledgement receiver in async mode, checkpoint may be lost." + ); + } + } + } +} + +/// Windows Event Log source implementation +pub struct WindowsEventLogSource { + config: WindowsEventLogConfig, + data_dir: PathBuf, + acknowledgements: bool, + log_namespace: LogNamespace, +} + +impl WindowsEventLogSource { + pub fn new( + config: WindowsEventLogConfig, + data_dir: PathBuf, + acknowledgements: bool, + log_namespace: LogNamespace, + ) -> crate::Result { + config.validate()?; + + Ok(Self { + config, + data_dir, + acknowledgements, + log_namespace, + }) + } + + async fn run_internal( + &mut self, + mut out: SourceSender, + shutdown: ShutdownSignal, + ) -> Result<(), WindowsEventLogError> { + let checkpointer = Arc::new(Checkpointer::new(&self.data_dir).await?); + + let finalizer = Finalizer::new( + self.acknowledgements, + Arc::clone(&checkpointer), + shutdown.clone(), + ); + + let mut subscription = EventLogSubscription::new( + &self.config, + Arc::clone(&checkpointer), + self.acknowledgements, + ) + .await?; + let parser = EventLogParser::new(&self.config, self.log_namespace); + + let events_received = register!(EventsReceived); + let bytes_received = register!(BytesReceived::from(Protocol::from("windows_event_log"))); + + let timeout_ms = self.config.event_timeout_ms as u32; + let batch_size = self.config.batch_size as usize; + let acknowledgements = self.acknowledgements; + + info!( + message = "Starting Windows Event Log source (pull mode).", + acknowledgements = acknowledgements, + ); + + // Spawn async shutdown watcher that signals the Windows shutdown event + // when the Vector shutdown signal fires. This wakes WaitForMultipleObjects + // while subscription is moved into spawn_blocking. + // + // We duplicate the handle so the watcher owns an independent kernel reference. + // This prevents use-after-close if the subscription panics and drops before + // the watcher fires — the duplicate remains valid until explicitly closed. + let (watcher_handle_raw, watcher_owns_handle): (isize, bool) = { + unsafe { + let src = HANDLE(subscription.shutdown_event_raw()); + let process = GetCurrentProcess(); + let mut dup = HANDLE::default(); + if DuplicateHandle( + process, + src, + process, + &mut dup, + 0, + false, + DUPLICATE_SAME_ACCESS, + ) + .is_ok() + { + (dup.0 as isize, true) + } else { + // Fallback: use the original handle without ownership. + // The watcher will signal but NOT close — EventLogSubscription::drop + // owns the handle and will close it. + warn!( + message = "Failed to duplicate shutdown event handle, falling back to shared handle." + ); + (src.0 as isize, false) + } + } + }; + let shutdown_watcher = shutdown.clone(); + tokio::spawn(async move { + shutdown_watcher.await; + unsafe { + let handle = + windows::Win32::Foundation::HANDLE(watcher_handle_raw as *mut std::ffi::c_void); + let _ = windows::Win32::System::Threading::SetEvent(handle); + if watcher_owns_handle { + let _ = windows::Win32::Foundation::CloseHandle(handle); + } + } + }); + + // Track when we last flushed checkpoints + let mut last_checkpoint = std::time::Instant::now(); + let checkpoint_interval = + std::time::Duration::from_secs(self.config.checkpoint_interval_secs); + + // Exponential backoff on consecutive recoverable errors + let mut error_backoff = std::time::Duration::from_millis(100); + const MAX_ERROR_BACKOFF: std::time::Duration = std::time::Duration::from_secs(5); + + // Health heartbeat: log every ~30s regardless of checkpoint interval + let mut timeout_count: u32 = 0; + let health_interval_timeouts = (30_000 / self.config.event_timeout_ms).max(1) as u32; + + loop { + // Move subscription into blocking thread for WaitForMultipleObjects. + // Ownership transfer ensures no data races between the blocking thread + // and async code. The shutdown watcher uses a raw HANDLE value (just an + // integer) to signal shutdown without needing access to the subscription. + let (returned_sub, wait_result) = tokio::task::spawn_blocking({ + let sub = subscription; + move || { + let result = sub.wait_for_events_blocking(timeout_ms); + (sub, result) + } + }) + .await + .map_err(|e| WindowsEventLogError::ConfigError { + message: format!("Wait task panicked: {e}"), + })?; + + subscription = returned_sub; + + match wait_result { + WaitResult::EventsAvailable => { + // Pull events via spawn_blocking (EvtNext/EvtRender are blocking APIs) + let (returned_sub, events_result) = tokio::task::spawn_blocking({ + let mut sub = subscription; + move || { + let result = sub.pull_events(batch_size); + (sub, result) + } + }) + .await + .map_err(|e| WindowsEventLogError::ConfigError { + message: format!("Pull task panicked: {e}"), + })?; + + subscription = returned_sub; + + // Rate limiting between batches (async-compatible) + if let Some(limiter) = subscription.rate_limiter() { + limiter.until_ready().await; + } + + match events_result { + Ok(events) if events.is_empty() => { + error_backoff = std::time::Duration::from_millis(100); + continue; + } + Ok(events) => { + error_backoff = std::time::Duration::from_millis(100); + debug!( + message = "Pulled Windows Event Log events.", + event_count = events.len() + ); + + let (batch, receiver) = + BatchNotifier::maybe_new_with_receiver(acknowledgements); + + let mut log_events = Vec::new(); + let mut total_byte_size = 0; + let mut channels_in_batch = std::collections::HashSet::new(); + + for event in events { + let channel = event.channel.clone(); + channels_in_batch.insert(channel.clone()); + let event_id = event.event_id; + match parser.parse_event(event) { + Ok(mut log_event) => { + let byte_size = log_event.estimated_json_encoded_size_of(); + total_byte_size += byte_size.get(); + + if let Some(ref batch) = batch { + log_event = log_event.with_batch_notifier(batch); + } + + log_events.push(log_event); + } + Err(e) => { + emit!(WindowsEventLogParseError { + error: e.to_string(), + channel, + event_id: Some(event_id), + }); + } + } + } + + if !log_events.is_empty() { + let count = log_events.len(); + events_received.emit(CountByteSize(count, total_byte_size.into())); + bytes_received.emit(ByteSize(total_byte_size)); + + // BACK PRESSURE: block here until the pipeline accepts + // the batch. We don't call EvtNext again until this completes. + if let Err(_error) = out.send_batch(log_events).await { + emit!(StreamClosedError { count }); + break; + } + + // Register checkpoint entry with finalizer + let bookmarks: Vec<(String, String)> = channels_in_batch + .into_iter() + .filter_map(|channel| { + subscription + .get_bookmark_xml(&channel) + .map(|xml| (channel, xml)) + }) + .collect(); + + if !bookmarks.is_empty() { + let entry = FinalizerEntry { bookmarks }; + finalizer.finalize(entry, receiver).await; + } + } + } + Err(e) => { + emit!(WindowsEventLogQueryError { + channel: "all".to_string(), + query: None, + error: e.to_string(), + }); + if !e.is_recoverable() { + error!( + message = "Non-recoverable pull error, shutting down.", + error = %e + ); + break; + } + // Exponential backoff on consecutive recoverable errors + warn!( + message = "Recoverable pull error, backing off.", + backoff_ms = error_backoff.as_millis() as u64, + error = %e + ); + tokio::time::sleep(error_backoff).await; + error_backoff = (error_backoff * 2).min(MAX_ERROR_BACKOFF); + } + } + } + + WaitResult::Timeout => { + // A full wait cycle without errors means the system is healthy; + // reset backoff so the next transient error starts fresh. + error_backoff = std::time::Duration::from_millis(100); + + // Periodic checkpoint flush (sync mode only) + if !acknowledgements && last_checkpoint.elapsed() >= checkpoint_interval { + if let Err(e) = subscription.flush_bookmarks().await { + warn!( + message = "Failed to flush bookmarks during periodic checkpoint.", + error = %e + ); + } + last_checkpoint = std::time::Instant::now(); + } + + // Health heartbeat on a separate ~30s cadence + timeout_count += 1; + if timeout_count >= health_interval_timeouts { + timeout_count = 0; + let (total, active) = subscription.channel_health_summary(); + if active < total { + warn!( + message = "Some channel subscriptions are inactive.", + total_channels = total, + active_channels = active, + ); + } else { + debug!( + message = "All channel subscriptions healthy.", + total_channels = total, + ); + } + } + } + + WaitResult::Shutdown => { + info!(message = "Windows Event Log wait received shutdown signal."); + if !acknowledgements { + info!(message = "Flushing bookmarks before shutdown."); + if let Err(e) = subscription.flush_bookmarks().await { + warn!(message = "Failed to flush bookmarks on shutdown.", error = %e); + } + } + break; + } + } + } + + Ok(()) + } +} + +} // if #[cfg(windows)] +} // cfg_if! + +#[async_trait] +#[typetag::serde(name = "windows_event_log")] +impl SourceConfig for WindowsEventLogConfig { + async fn build(&self, _cx: SourceContext) -> crate::Result { + #[cfg(not(windows))] + { + Err("The windows_event_log source is only supported on Windows.".into()) + } + + #[cfg(windows)] + { + let data_dir = _cx + .globals + .resolve_and_make_data_subdir(self.data_dir.as_ref(), _cx.key.id())?; + + let acknowledgements = _cx.do_acknowledgements(self.acknowledgements); + + let log_namespace = _cx.log_namespace(self.log_namespace); + let source = WindowsEventLogSource::new( + self.clone(), + data_dir, + acknowledgements, + log_namespace, + )?; + Ok(Box::pin(async move { + let mut source = source; + if let Err(error) = source.run_internal(_cx.out, _cx.shutdown).await { + error!(message = "Windows Event Log source failed.", %error); + } + Ok(()) + })) + } + } + + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + let log_namespace = self + .log_namespace + .map(|b| { + if b { + LogNamespace::Vector + } else { + LogNamespace::Legacy + } + }) + .unwrap_or(global_log_namespace); + + let schema_definition = match log_namespace { + LogNamespace::Vector => vector_lib::schema::Definition::new_with_default_metadata( + Kind::object(std::collections::BTreeMap::from([ + ("timestamp".into(), Kind::timestamp().or_undefined()), + ("message".into(), Kind::bytes().or_undefined()), + ("level".into(), Kind::bytes().or_undefined()), + ("source".into(), Kind::bytes().or_undefined()), + ("event_id".into(), Kind::integer().or_undefined()), + ("provider_name".into(), Kind::bytes().or_undefined()), + ("computer".into(), Kind::bytes().or_undefined()), + ("user_id".into(), Kind::bytes().or_undefined()), + ("user_name".into(), Kind::bytes().or_undefined()), + ("record_id".into(), Kind::integer().or_undefined()), + ("activity_id".into(), Kind::bytes().or_undefined()), + ("related_activity_id".into(), Kind::bytes().or_undefined()), + ("process_id".into(), Kind::integer().or_undefined()), + ("thread_id".into(), Kind::integer().or_undefined()), + ("channel".into(), Kind::bytes().or_undefined()), + ("opcode".into(), Kind::integer().or_undefined()), + ("task".into(), Kind::integer().or_undefined()), + ("keywords".into(), Kind::bytes().or_undefined()), + ("level_value".into(), Kind::integer().or_undefined()), + ("provider_guid".into(), Kind::bytes().or_undefined()), + ("version".into(), Kind::integer().or_undefined()), + ("qualifiers".into(), Kind::integer().or_undefined()), + ( + "string_inserts".into(), + Kind::array(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), + ), + ( + "event_data".into(), + Kind::object(std::collections::BTreeMap::new()).or_undefined(), + ), + ( + "user_data".into(), + Kind::object(std::collections::BTreeMap::new()).or_undefined(), + ), + ("task_name".into(), Kind::bytes().or_undefined()), + ("opcode_name".into(), Kind::bytes().or_undefined()), + ( + "keyword_names".into(), + Kind::array(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), + ), + ])), + [LogNamespace::Vector], + ), + LogNamespace::Legacy => vector_lib::schema::Definition::any(), + }; + + vec![SourceOutput::new_maybe_logs( + DataType::Log, + schema_definition, + )] + } + + fn resources(&self) -> Vec { + self.channels + .iter() + .map(|channel| crate::config::Resource::DiskBuffer(channel.clone())) + .collect() + } + + fn can_acknowledge(&self) -> bool { + true + } +} + +inventory::submit! { + SourceDescription::new::( + "windows_event_log", + "Collect logs from Windows Event Log channels", + "A Windows-specific source that subscribes to Windows Event Log channels and streams events in real-time using the Windows Event Log API.", + "https://vector.dev/docs/reference/configuration/sources/windows_event_log/" + ) +} diff --git a/src/sources/windows_event_log/parser.rs b/src/sources/windows_event_log/parser.rs new file mode 100644 index 0000000000000..f8d17396ebd47 --- /dev/null +++ b/src/sources/windows_event_log/parser.rs @@ -0,0 +1,804 @@ +use vector_lib::config::{LogNamespace, log_schema}; +use vrl::value::{ObjectMap, Value}; + +use vector_lib::event::LogEvent; + +use super::{ + config::{EventDataFormat, WindowsEventLogConfig}, + error::*, + xml_parser::WindowsEvent, +}; + +/// Parser for converting Windows Event Log events to Vector LogEvents +pub struct EventLogParser { + config: WindowsEventLogConfig, + log_namespace: LogNamespace, +} + +impl EventLogParser { + /// Create a new parser with the given configuration and resolved namespace + pub fn new(config: &WindowsEventLogConfig, log_namespace: LogNamespace) -> Self { + Self { + config: config.clone(), + log_namespace, + } + } + + /// Parse a Windows event into a Vector LogEvent + pub fn parse_event(&self, event: WindowsEvent) -> Result { + let mut log_event = LogEvent::default(); + + // Set core fields based on log namespace + match self.log_namespace { + LogNamespace::Vector => { + self.set_vector_namespace_fields(&mut log_event, &event)?; + } + LogNamespace::Legacy => { + self.set_legacy_namespace_fields(&mut log_event, &event)?; + } + } + + // Apply field filtering + self.apply_field_filtering(&mut log_event)?; + + // Apply custom formatting + self.apply_custom_formatting(&mut log_event)?; + + Ok(log_event) + } + + fn set_vector_namespace_fields( + &self, + log_event: &mut LogEvent, + event: &WindowsEvent, + ) -> Result<(), WindowsEventLogError> { + let log_schema = log_schema(); + + // Set timestamp + if let Some(timestamp_key) = log_schema.timestamp_key() { + log_event.try_insert( + timestamp_key.to_string().as_str(), + Value::Timestamp(event.time_created), + ); + } + + // Set message (rendered message or event data) + if let Some(message_key) = log_schema.message_key() { + let message = event + .rendered_message + .as_ref() + .cloned() + .unwrap_or_else(|| self.extract_message_from_event_data(event)); + + log_event.try_insert( + message_key.to_string().as_str(), + Value::Bytes(message.into()), + ); + } + + // Set source/host + if let Some(host_key) = log_schema.host_key() { + log_event.try_insert( + host_key.to_string().as_str(), + Value::Bytes(event.computer.clone().into()), + ); + } + + // Set Windows-specific fields + self.set_windows_fields(log_event, event)?; + + Ok(()) + } + + fn set_legacy_namespace_fields( + &self, + log_event: &mut LogEvent, + event: &WindowsEvent, + ) -> Result<(), WindowsEventLogError> { + // Legacy namespace puts everything in the root + let log_schema = log_schema(); + + // Set standard fields + if let Some(timestamp_key) = log_schema.timestamp_key() { + log_event.try_insert( + timestamp_key.to_string().as_str(), + Value::Timestamp(event.time_created), + ); + } + + if let Some(message_key) = log_schema.message_key() { + let message = event + .rendered_message + .as_ref() + .cloned() + .unwrap_or_else(|| self.extract_message_from_event_data(event)); + + log_event.try_insert( + message_key.to_string().as_str(), + Value::Bytes(message.into()), + ); + } + + if let Some(host_key) = log_schema.host_key() { + log_event.try_insert( + host_key.to_string().as_str(), + Value::Bytes(event.computer.clone().into()), + ); + } + + // Set Windows-specific fields at root level + self.set_windows_fields(log_event, event)?; + + Ok(()) + } + + fn set_windows_fields( + &self, + log_event: &mut LogEvent, + event: &WindowsEvent, + ) -> Result<(), WindowsEventLogError> { + // Core Windows Event Log fields + log_event.insert("event_id", Value::Integer(event.event_id as i64)); + + log_event.insert("record_id", Value::Integer(event.record_id as i64)); + + log_event.insert("level", Value::Bytes(event.level_name().into())); + + log_event.insert("level_value", Value::Integer(event.level as i64)); + + log_event.insert("channel", Value::Bytes(event.channel.clone().into())); + + log_event.insert( + "provider_name", + Value::Bytes(event.provider_name.clone().into()), + ); + + if let Some(ref provider_guid) = event.provider_guid { + log_event.insert("provider_guid", Value::Bytes(provider_guid.clone().into())); + } + + log_event.insert("computer", Value::Bytes(event.computer.clone().into())); + + if let Some(ref user_id) = event.user_id { + log_event.insert("user_id", Value::Bytes(user_id.clone().into())); + } + + if let Some(ref user_name) = event.user_name { + log_event.insert("user_name", Value::Bytes(user_name.clone().into())); + } + + log_event.insert("process_id", Value::Integer(event.process_id as i64)); + + log_event.insert("thread_id", Value::Integer(event.thread_id as i64)); + + if event.task != 0 { + log_event.insert("task", Value::Integer(event.task as i64)); + + if let Some(ref task_name) = event.task_name { + log_event.insert("task_name", Value::Bytes(task_name.clone().into())); + } + } + + if event.opcode != 0 { + log_event.insert("opcode", Value::Integer(event.opcode as i64)); + + if let Some(ref opcode_name) = event.opcode_name { + log_event.insert("opcode_name", Value::Bytes(opcode_name.clone().into())); + } + } + + if event.keywords != 0 { + log_event.insert( + "keywords", + Value::Bytes(format!("0x{:016X}", event.keywords).into()), + ); + + if !event.keyword_names.is_empty() { + let kw_values: Vec = event + .keyword_names + .iter() + .map(|s| Value::Bytes(s.clone().into())) + .collect(); + log_event.insert("keyword_names", Value::Array(kw_values)); + } + } + + if let Some(ref activity_id) = event.activity_id { + log_event.insert("activity_id", Value::Bytes(activity_id.clone().into())); + } + + if let Some(ref related_activity_id) = event.related_activity_id { + log_event.insert( + "related_activity_id", + Value::Bytes(related_activity_id.clone().into()), + ); + } + + // New FluentBit-compatible fields + if let Some(version) = event.version { + log_event.insert("version", Value::Integer(version as i64)); + } + + if let Some(qualifiers) = event.qualifiers { + log_event.insert("qualifiers", Value::Integer(qualifiers as i64)); + } + + // StringInserts field for FluentBit compatibility + if !event.string_inserts.is_empty() { + let string_inserts: Vec = event + .string_inserts + .iter() + .map(|s| Value::Bytes(s.clone().into())) + .collect(); + log_event.insert("string_inserts", Value::Array(string_inserts)); + } + + // Include raw XML if requested + if self.config.include_xml && !event.raw_xml.is_empty() { + log_event.insert("xml", Value::Bytes(event.raw_xml.clone().into())); + } + + // Include event data if configured + if self.config.field_filter.include_event_data && !event.event_data.is_empty() { + let mut event_data_map = ObjectMap::new(); + for (key, value) in &event.event_data { + let typed_value = self.coerce_field_value(key, value); + event_data_map.insert(key.clone().into(), typed_value); + } + log_event.insert("event_data", Value::Object(event_data_map)); + } + + // Include user data if configured + if self.config.field_filter.include_user_data && !event.user_data.is_empty() { + let mut user_data_map = ObjectMap::new(); + for (key, value) in &event.user_data { + let typed_value = self.coerce_field_value(key, value); + user_data_map.insert(key.clone().into(), typed_value); + } + log_event.insert("user_data", Value::Object(user_data_map)); + } + + Ok(()) + } + + /// Convert a string value to a typed Value using explicit format config. + /// + /// Values are kept as strings by default. This prevents silent breakage + /// of downstream SIEM correlation rules that + /// compare event data fields as strings (e.g. `LogonType == "2"`). + /// + /// Use `event_data_format` config entries to opt in to typed coercion + /// for specific fields. + fn coerce_field_value(&self, key: &str, value: &str) -> Value { + let as_bytes = || Value::Bytes(value.to_string().into()); + + if let Some(fmt) = self.config.event_data_format.get(key) { + return match fmt { + EventDataFormat::Integer => value + .parse::() + .map(Value::Integer) + .unwrap_or_else(|_| as_bytes()), + EventDataFormat::Float => value + .parse::() + .ok() + .and_then(|f| ordered_float::NotNan::new(f).ok()) + .map(Value::Float) + .unwrap_or_else(as_bytes), + EventDataFormat::Boolean => { + let lower = value.to_lowercase(); + Value::Boolean(matches!(lower.as_str(), "true" | "1" | "yes" | "on")) + } + EventDataFormat::String | EventDataFormat::Auto => as_bytes(), + }; + } + + as_bytes() + } + + fn extract_message_from_event_data(&self, event: &WindowsEvent) -> String { + // Try to find a message in named event data fields + for (key, value) in &event.event_data { + if key.to_lowercase().contains("message") { + return value.clone(); + } + } + + // Try string inserts (unnamed elements, e.g. from eventcreate) + if let Some(first) = event.string_inserts.first() { + if !first.is_empty() { + return first.clone(); + } + } + + // Fall back to generic message + format!( + "Event ID {} from {} on {}", + event.event_id, event.provider_name, event.computer + ) + } + + fn apply_field_filtering(&self, log_event: &mut LogEvent) -> Result<(), WindowsEventLogError> { + let filter = &self.config.field_filter; + + // If include_fields is specified, remove fields not in the list + if let Some(ref include_fields) = filter.include_fields { + // Pre-allocate HashSet with known capacity for better performance + let mut include_set = std::collections::HashSet::with_capacity(include_fields.len()); + for field in include_fields { + include_set.insert(field.as_str()); + } + + // Remove fields not in include set + let keys_to_remove: Vec = log_event + .all_event_fields() + .map(|iter| iter.collect::>()) + .unwrap_or_default() + .into_iter() + .filter_map(|(key, _)| { + if !include_set.contains(key.as_str()) { + Some(key.to_string()) + } else { + None + } + }) + .collect(); + + for key in keys_to_remove { + log_event.remove(key.as_str()); + } + } + + // Remove fields in exclude_fields list - single pass removal + if let Some(ref exclude_fields) = filter.exclude_fields { + for field in exclude_fields { + log_event.remove(field.as_str()); + } + } + + Ok(()) + } + + fn apply_custom_formatting( + &self, + log_event: &mut LogEvent, + ) -> Result<(), WindowsEventLogError> { + for (field_name, format) in &self.config.event_data_format { + if let Some(current_value) = log_event.get(field_name.as_str()) { + let formatted_value = self.format_value(current_value, format)?; + log_event.insert(field_name.as_str(), formatted_value); + } + } + + Ok(()) + } + + fn format_value( + &self, + value: &Value, + format: &EventDataFormat, + ) -> Result { + match format { + EventDataFormat::String => Ok(Value::Bytes(value.to_string().into())), + EventDataFormat::Integer => { + let int_value = match value { + Value::Integer(i) => *i, + Value::Float(f) => f.into_inner() as i64, + Value::Bytes(b) => String::from_utf8_lossy(b).parse::().map_err(|_| { + WindowsEventLogError::FilterError { + message: format!( + "Cannot convert '{}' to integer", + String::from_utf8_lossy(b) + ), + } + })?, + _ => { + return Err(WindowsEventLogError::FilterError { + message: format!("Cannot convert {:?} to integer", value), + }); + } + }; + Ok(Value::Integer(int_value)) + } + EventDataFormat::Float => { + let float_value = match value { + Value::Float(f) => f.into_inner(), + Value::Integer(i) => *i as f64, + Value::Bytes(b) => String::from_utf8_lossy(b).parse::().map_err(|_| { + WindowsEventLogError::FilterError { + message: format!( + "Cannot convert '{}' to float", + String::from_utf8_lossy(b) + ), + } + })?, + _ => { + return Err(WindowsEventLogError::FilterError { + message: format!("Cannot convert {:?} to float", value), + }); + } + }; + Ok(Value::Float( + ordered_float::NotNan::new(float_value) + .unwrap_or_else(|_| ordered_float::NotNan::new(0.0).unwrap()), + )) + } + EventDataFormat::Boolean => { + let bool_value = match value { + Value::Boolean(b) => *b, + Value::Integer(i) => *i != 0, + Value::Bytes(b) => { + let s = String::from_utf8_lossy(b).to_lowercase(); + matches!(s.as_str(), "true" | "1" | "yes" | "on") + } + _ => { + return Err(WindowsEventLogError::FilterError { + message: format!("Cannot convert {:?} to boolean", value), + }); + } + }; + Ok(Value::Boolean(bool_value)) + } + EventDataFormat::Auto => { + // Keep the original format + Ok(value.clone()) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use std::collections::HashMap; + + /// Creates a generic test event for parser unit tests. + /// Note: tests.rs has a separate create_test_event() with realistic Security audit data. + /// This version uses simple generic values to isolate parser logic testing. + fn create_test_event() -> WindowsEvent { + WindowsEvent { + record_id: 12345, + event_id: 1000, + level: 4, + task: 1, + opcode: 2, + keywords: 0x8000000000000000, + time_created: Utc::now(), + provider_name: "TestProvider".to_string(), + provider_guid: Some("{12345678-1234-1234-1234-123456789012}".to_string()), + channel: "TestChannel".to_string(), + computer: "TEST-PC".to_string(), + user_id: Some("S-1-5-21-1234567890-1234567890-1234567890-1000".to_string()), + process_id: 1234, + thread_id: 5678, + activity_id: Some("{ABCDEFGH-1234-1234-1234-123456789012}".to_string()), + related_activity_id: None, + raw_xml: "1000".to_string(), + rendered_message: Some("Test message".to_string()), + event_data: { + let mut map = HashMap::new(); + map.insert("key1".to_string(), "value1".to_string()); + map.insert("key2".to_string(), "value2".to_string()); + map + }, + user_data: HashMap::new(), + task_name: None, + opcode_name: Some("Stop".to_string()), + keyword_names: vec!["Classic".to_string()], + user_name: None, + version: Some(1), + qualifiers: Some(0), + string_inserts: vec!["value1".to_string(), "value2".to_string()], + } + } + + #[test] + fn test_parser_uses_provided_namespace() { + let config = WindowsEventLogConfig::default(); + let parser = EventLogParser::new(&config, LogNamespace::Vector); + + assert!(matches!(parser.log_namespace, LogNamespace::Vector)); + } + + #[test] + fn test_parse_event_basic() { + let config = WindowsEventLogConfig::default(); + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event.clone()).unwrap(); + + // Check core fields + assert_eq!(log_event.get("event_id").unwrap(), &Value::Integer(1000)); + assert_eq!(log_event.get("record_id").unwrap(), &Value::Integer(12345)); + assert_eq!( + log_event.get("level").unwrap(), + &Value::Bytes("Information".into()) + ); + assert_eq!( + log_event.get("channel").unwrap(), + &Value::Bytes("TestChannel".into()) + ); + assert_eq!( + log_event.get("provider_name").unwrap(), + &Value::Bytes("TestProvider".into()) + ); + assert_eq!( + log_event.get("computer").unwrap(), + &Value::Bytes("TEST-PC".into()) + ); + + // Enriched fields from the new resolution methods + // opcode=2 -> "Stop" + assert_eq!( + log_event.get("opcode_name").unwrap(), + &Value::Bytes("Stop".into()) + ); + // keywords=0x8000000000000000 -> ["Classic"] + assert_eq!( + log_event.get("keyword_names").unwrap(), + &Value::Array(vec![Value::Bytes("Classic".into())]) + ); + // task=1 with provider "TestProvider" has no known mapping + assert!(log_event.get("task_name").is_none()); + } + + #[test] + fn test_parse_security_audit_event() { + let config = WindowsEventLogConfig::default(); + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + + let event = WindowsEvent { + record_id: 99999, + event_id: 4624, + level: 0, // Security audit events use level 0 + task: 12544, + opcode: 0, + keywords: 0x0020000000000000, // Audit Success + time_created: Utc::now(), + provider_name: "Microsoft-Windows-Security-Auditing".to_string(), + provider_guid: Some("{54849625-5478-4994-A5BA-3E3B0328C30D}".to_string()), + channel: "Security".to_string(), + computer: "DC01.corp.local".to_string(), + user_id: Some("S-1-5-18".to_string()), + process_id: 636, + thread_id: 1234, + activity_id: None, + related_activity_id: None, + raw_xml: String::new(), + rendered_message: Some("An account was successfully logged on.".to_string()), + event_data: HashMap::new(), + user_data: HashMap::new(), + task_name: Some("Logon".to_string()), + opcode_name: None, + keyword_names: vec!["Audit Success".to_string()], + user_name: None, + version: Some(2), + qualifiers: None, + string_inserts: vec![], + }; + + let log_event = parser.parse_event(event).unwrap(); + + // Level 0 should map to "Information" (not "Unknown") + assert_eq!( + log_event.get("level").unwrap(), + &Value::Bytes("Information".into()) + ); + assert_eq!(log_event.get("level_value").unwrap(), &Value::Integer(0)); + + // Task 12544 -> "Logon" + assert_eq!( + log_event.get("task_name").unwrap(), + &Value::Bytes("Logon".into()) + ); + + // keywords=Audit Success + assert_eq!( + log_event.get("keyword_names").unwrap(), + &Value::Array(vec![Value::Bytes("Audit Success".into())]) + ); + + // opcode=0 is not emitted since the condition is `if event.opcode != 0` + assert!(log_event.get("opcode").is_none()); + assert!(log_event.get("opcode_name").is_none()); + } + + #[test] + fn test_parse_event_with_xml() { + let mut config = WindowsEventLogConfig::default(); + config.include_xml = true; + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event.clone()).unwrap(); + + assert!(log_event.get("xml").is_some()); + assert_eq!( + log_event.get("xml").unwrap(), + &Value::Bytes(event.raw_xml.into()) + ); + } + + #[test] + fn test_parse_event_data_filtering() { + let mut config = WindowsEventLogConfig::default(); + config.field_filter.include_event_data = true; + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event.clone()).unwrap(); + + if let Some(Value::Object(event_data)) = log_event.get("event_data") { + assert_eq!(event_data.get("key1"), Some(&Value::Bytes("value1".into()))); + assert_eq!(event_data.get("key2"), Some(&Value::Bytes("value2".into()))); + } else { + panic!("event_data should be present"); + } + } + + #[test] + fn test_custom_formatting() { + let mut config = WindowsEventLogConfig::default(); + config + .event_data_format + .insert("event_id".to_string(), EventDataFormat::String); + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event).unwrap(); + + // event_id should be converted to string + assert_eq!( + log_event.get("event_id").unwrap(), + &Value::Bytes("1000".into()) + ); + } + + #[test] + fn test_field_include_filtering() { + let mut config = WindowsEventLogConfig::default(); + config.field_filter.include_fields = + Some(vec!["event_id".to_string(), "level".to_string()]); + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event).unwrap(); + + // Only included fields should be present + assert!(log_event.get("event_id").is_some()); + assert!(log_event.get("level").is_some()); + // Other fields should be filtered out + // Note: This test might need adjustment based on actual field filtering implementation + } + + #[test] + fn test_field_exclude_filtering() { + let mut config = WindowsEventLogConfig::default(); + config.field_filter.exclude_fields = + Some(vec!["raw_xml".to_string(), "provider_guid".to_string()]); + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event).unwrap(); + + // Excluded fields should not be present + assert!(log_event.get("raw_xml").is_none()); + assert!(log_event.get("provider_guid").is_none()); + // Other fields should still be there + assert!(log_event.get("event_id").is_some()); + } + + #[test] + fn test_extract_message_from_event_data() { + let config = WindowsEventLogConfig::default(); + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + + let mut event = create_test_event(); + event.rendered_message = None; + event + .event_data + .insert("message".to_string(), "Custom message".to_string()); + + let message = parser.extract_message_from_event_data(&event); + assert_eq!(message, "Custom message"); + } + + #[test] + fn test_format_value_conversions() { + let config = WindowsEventLogConfig::default(); + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + + // Test string conversion + let value = Value::Integer(123); + let result = parser + .format_value(&value, &EventDataFormat::String) + .unwrap(); + assert_eq!(result, Value::Bytes("123".into())); + + // Test integer conversion + let value = Value::Bytes("456".into()); + let result = parser + .format_value(&value, &EventDataFormat::Integer) + .unwrap(); + assert_eq!(result, Value::Integer(456)); + + // Test float conversion + let value = Value::Bytes("123.45".into()); + let result = parser + .format_value(&value, &EventDataFormat::Float) + .unwrap(); + if let Value::Float(f) = result { + assert!((f.into_inner() - 123.45).abs() < f64::EPSILON); + } else { + panic!("Expected float value"); + } + + // Test boolean conversion + let value = Value::Bytes("true".into()); + let result = parser + .format_value(&value, &EventDataFormat::Boolean) + .unwrap(); + assert_eq!(result, Value::Boolean(true)); + + // Test auto format (no change) + let value = Value::Integer(789); + let result = parser.format_value(&value, &EventDataFormat::Auto).unwrap(); + assert_eq!(result, Value::Integer(789)); + } + + #[test] + fn test_include_and_exclude_fields_interaction() { + let mut config = WindowsEventLogConfig::default(); + // Include a set of fields, then exclude one from that set + config.field_filter.include_fields = Some(vec![ + "event_id".to_string(), + "level".to_string(), + "channel".to_string(), + ]); + config.field_filter.exclude_fields = Some(vec!["channel".to_string()]); + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + let log_event = parser.parse_event(event).unwrap(); + + // event_id and level should be present (in include list, not in exclude list) + assert!(log_event.get("event_id").is_some()); + assert!(log_event.get("level").is_some()); + // channel should be excluded (in both include and exclude, exclude wins) + assert!(log_event.get("channel").is_none()); + // Fields not in include list should be absent + assert!(log_event.get("computer").is_none()); + assert!(log_event.get("record_id").is_none()); + } + + #[test] + fn test_apply_custom_formatting_error_on_invalid_conversion() { + let mut config = WindowsEventLogConfig::default(); + // Configure event_id to be formatted as integer — it already is an integer, + // so this succeeds. Instead, configure a string field to be converted to integer. + config + .event_data_format + .insert("level".to_string(), EventDataFormat::Integer); + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + // level is stored as a string like "Information" — converting to integer should fail + let result = parser.parse_event(event); + assert!( + result.is_err(), + "Should fail when converting non-numeric string to integer" + ); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("Cannot convert"), + "Error should describe the conversion failure, got: {err}" + ); + } +} diff --git a/src/sources/windows_event_log/render.rs b/src/sources/windows_event_log/render.rs new file mode 100644 index 0000000000000..24b5d16210a8f --- /dev/null +++ b/src/sources/windows_event_log/render.rs @@ -0,0 +1,166 @@ +//! Event rendering and channel statistics helpers for Windows Event Log. +//! +//! Extracted from `subscription.rs` to keep that module focused on +//! subscription lifecycle and event pulling. + +use metrics::Gauge; +use windows::Win32::Foundation::ERROR_INSUFFICIENT_BUFFER; +use windows::Win32::System::EventLog::{ + EVT_HANDLE, EVT_LOG_PROPERTY_ID, EvtClose, EvtGetLogInfo, EvtLogNumberOfLogRecords, EvtOpenLog, + EvtRender, EvtRenderEventXml, +}; +use windows::core::HSTRING; + +use super::error::WindowsEventLogError; + +/// Render an event handle to XML using reusable buffers. +pub(super) fn render_event_xml( + render_buffer: &mut Vec, + decode_buffer: &mut Vec, + event_handle: EVT_HANDLE, +) -> Result { + const MAX_BUFFER_SIZE: u32 = 10 * 1024 * 1024; // 10MB limit + + let buffer_size = render_buffer.len() as u32; + let mut buffer_used = 0u32; + let mut property_count = 0u32; + + let result = unsafe { + EvtRender( + None, + event_handle, + EvtRenderEventXml.0, + buffer_size, + Some(render_buffer.as_mut_ptr() as *mut std::ffi::c_void), + &mut buffer_used, + &mut property_count, + ) + }; + + if let Err(e) = result { + if e.code() == ERROR_INSUFFICIENT_BUFFER.into() { + if buffer_used == 0 { + return Ok(String::new()); + } + if buffer_used > MAX_BUFFER_SIZE { + return Err(WindowsEventLogError::ReadEventError { source: e }); + } + + // Grow the reusable buffer + render_buffer.resize(buffer_used as usize, 0); + let mut second_buffer_used = 0u32; + let mut second_property_count = 0u32; + + unsafe { + EvtRender( + None, + event_handle, + EvtRenderEventXml.0, + buffer_used, + Some(render_buffer.as_mut_ptr() as *mut std::ffi::c_void), + &mut second_buffer_used, + &mut second_property_count, + ) + } + .map_err(|e2| WindowsEventLogError::ReadEventError { source: e2 })?; + + let result = decode_utf16_buffer(render_buffer, second_buffer_used, decode_buffer); + + // Shrink if buffer grew very large (match normal-path threshold) + const SHRINK_THRESHOLD: usize = 64 * 1024; + if render_buffer.len() > SHRINK_THRESHOLD { + render_buffer.resize(SHRINK_THRESHOLD, 0); + render_buffer.shrink_to_fit(); + } + + return Ok(result); + } + return Err(WindowsEventLogError::ReadEventError { source: e }); + } + + let result = decode_utf16_buffer(render_buffer, buffer_used, decode_buffer); + + // Shrink the buffer back down if a large event caused it to grow. + // 64 KB covers the vast majority of events without repeated reallocation. + const SHRINK_THRESHOLD: usize = 64 * 1024; + if render_buffer.len() > SHRINK_THRESHOLD { + render_buffer.resize(SHRINK_THRESHOLD, 0); + render_buffer.shrink_to_fit(); + } + + Ok(result) +} + +/// Update the channel record count gauge using EvtGetLogInfo. +/// +/// Reports total records in the channel. SOC teams compare this against +/// `rate(events_read_total)` to detect ingestion lag. +/// Best-effort: if any API call fails, the gauge is left unchanged. +pub(super) fn update_channel_records(channel: &str, gauge: &Gauge) { + let channel_hstring = HSTRING::from(channel); + let log_handle = unsafe { + // EvtOpenChannelPath = 1 + match EvtOpenLog(None, &channel_hstring, 1) { + Ok(h) => h, + Err(_) => return, + } + }; + + // EVT_VARIANT is 16 bytes: 8 bytes value + 4 bytes count + 4 bytes type + let mut buffer = [0u8; 16]; + let mut buffer_used = 0u32; + + let result = unsafe { + EvtGetLogInfo( + log_handle, + EVT_LOG_PROPERTY_ID(EvtLogNumberOfLogRecords.0), + buffer.len() as u32, + Some(buffer.as_mut_ptr() as *mut _), + &mut buffer_used, + ) + }; + + unsafe { + let _ = EvtClose(log_handle); + } + + if result.is_ok() { + // EVT_VARIANT for UInt64: first 8 bytes are the value (little-endian) + let record_count = u64::from_le_bytes(buffer[..8].try_into().unwrap_or([0; 8])); + gauge.set(record_count as f64); + } +} + +/// Decode a UTF-16LE buffer (as returned by Windows EvtRender) into a String. +/// +/// Uses a reusable `Vec` decode buffer to avoid per-event heap allocations. +/// Copies byte pairs into the properly-aligned buffer instead of casting the +/// pointer, which would be undefined behavior when the source buffer is not +/// 2-byte aligned. +fn decode_utf16_buffer(buffer: &[u8], bytes_used: u32, decode_buf: &mut Vec) -> String { + if bytes_used == 0 || bytes_used as usize > buffer.len() { + return String::new(); + } + if bytes_used < 2 || bytes_used % 2 != 0 { + return String::new(); + } + + let u16_len = bytes_used as usize / 2; + decode_buf.resize(u16_len, 0); + for i in 0..u16_len { + decode_buf[i] = u16::from_le_bytes([buffer[i * 2], buffer[i * 2 + 1]]); + } + + // Strip trailing null terminator + let xml_len = if !decode_buf.is_empty() && decode_buf[u16_len - 1] == 0 { + u16_len - 1 + } else { + u16_len + }; + + if xml_len == 0 { + return String::new(); + } + + String::from_utf16_lossy(&decode_buf[..xml_len]) +} diff --git a/src/sources/windows_event_log/sid_resolver.rs b/src/sources/windows_event_log/sid_resolver.rs new file mode 100644 index 0000000000000..a6e4d7b8072f8 --- /dev/null +++ b/src/sources/windows_event_log/sid_resolver.rs @@ -0,0 +1,160 @@ +use std::num::NonZeroUsize; + +use lru::LruCache; +use windows::Win32::Foundation::{HLOCAL, LocalFree}; +use windows::Win32::Security::Authorization::ConvertStringSidToSidW; +use windows::Win32::Security::{LookupAccountSidW, PSID, SID_NAME_USE}; +use windows::core::{HSTRING, PWSTR}; + +/// Maximum number of SID-to-account name mappings to cache. +const SID_CACHE_CAPACITY: usize = 4096; + +/// Resolves Windows SID strings (e.g. "S-1-5-18") to human-readable account +/// names (e.g. "NT AUTHORITY\SYSTEM") using the Windows `LookupAccountSidW` API. +/// +/// Results are cached in an LRU cache to avoid repeated lookups for the same SID. +pub struct SidResolver { + cache: LruCache>, +} + +impl SidResolver { + pub fn new() -> Self { + Self { + cache: LruCache::new(NonZeroUsize::new(SID_CACHE_CAPACITY).unwrap()), + } + } + + /// Resolve a SID string to "DOMAIN\Username" format. + /// Returns `None` if the SID cannot be resolved (unknown account, invalid SID, etc.). + /// Caches both successful and failed lookups. + pub fn resolve(&mut self, sid_string: &str) -> Option { + if let Some(cached) = self.cache.get(sid_string) { + return cached.clone(); + } + + let result = lookup_sid(sid_string); + self.cache.put(sid_string.to_string(), result.clone()); + result + } +} + +/// Convert a SID string to a PSID via ConvertStringSidToSidW, then call +/// LookupAccountSidW to get the account name. +fn lookup_sid(sid_string: &str) -> Option { + let sid_hstring = HSTRING::from(sid_string); + + // Convert string SID to binary PSID + let mut psid = PSID::default(); + let convert_result = unsafe { ConvertStringSidToSidW(&sid_hstring, &mut psid) }; + if convert_result.is_err() { + return None; + } + + // LookupAccountSidW: first call to get buffer sizes + let mut name_len: u32 = 0; + let mut domain_len: u32 = 0; + let mut sid_type = SID_NAME_USE::default(); + + let _ = unsafe { + LookupAccountSidW( + None, + psid, + PWSTR::null(), + &mut name_len, + PWSTR::null(), + &mut domain_len, + &mut sid_type, + ) + }; + + if name_len == 0 { + unsafe { + let _ = LocalFree(HLOCAL(psid.0)); + } + return None; + } + + // Second call with properly sized buffers + let mut name_buf = vec![0u16; name_len as usize]; + let mut domain_buf = vec![0u16; domain_len as usize]; + + let result = unsafe { + LookupAccountSidW( + None, + psid, + PWSTR(name_buf.as_mut_ptr()), + &mut name_len, + PWSTR(domain_buf.as_mut_ptr()), + &mut domain_len, + &mut sid_type, + ) + }; + + // Free the PSID allocated by ConvertStringSidToSidW + unsafe { + let _ = LocalFree(HLOCAL(psid.0)); + } + + if result.is_err() { + return None; + } + + let name = String::from_utf16_lossy(&name_buf[..name_len as usize]); + let domain = String::from_utf16_lossy(&domain_buf[..domain_len as usize]); + + if domain.is_empty() { + Some(name) + } else { + Some(format!("{domain}\\{name}")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sid_resolver_caches_results() { + let mut resolver = SidResolver::new(); + // Well-known SID: S-1-5-18 = NT AUTHORITY\SYSTEM + let first = resolver.resolve("S-1-5-18"); + let second = resolver.resolve("S-1-5-18"); + assert_eq!(first, second); + } + + #[test] + fn test_invalid_sid_returns_none() { + let mut resolver = SidResolver::new(); + assert!(resolver.resolve("not-a-sid").is_none()); + assert!(resolver.resolve("").is_none()); + } + + #[test] + fn test_well_known_sids() { + let mut resolver = SidResolver::new(); + + // S-1-5-18 = SYSTEM + if let Some(name) = resolver.resolve("S-1-5-18") { + assert!( + name.contains("SYSTEM"), + "S-1-5-18 should resolve to SYSTEM, got: {name}" + ); + } + + // S-1-5-19 = LOCAL SERVICE + if let Some(name) = resolver.resolve("S-1-5-19") { + assert!( + name.contains("LOCAL SERVICE"), + "S-1-5-19 should resolve to LOCAL SERVICE, got: {name}" + ); + } + + // S-1-5-20 = NETWORK SERVICE + if let Some(name) = resolver.resolve("S-1-5-20") { + assert!( + name.contains("NETWORK SERVICE"), + "S-1-5-20 should resolve to NETWORK SERVICE, got: {name}" + ); + } + } +} diff --git a/src/sources/windows_event_log/subscription.rs b/src/sources/windows_event_log/subscription.rs new file mode 100644 index 0000000000000..dc92713f691d9 --- /dev/null +++ b/src/sources/windows_event_log/subscription.rs @@ -0,0 +1,1275 @@ +use std::{ + collections::HashMap, + num::{NonZeroU32, NonZeroUsize}, + sync::Arc, +}; + +use lru::LruCache; + +use governor::{ + Quota, RateLimiter, + clock::DefaultClock, + state::{InMemoryState, NotKeyed}, +}; +use metrics::{Counter, Gauge, counter, gauge}; +use windows::Win32::Foundation::{CloseHandle, HANDLE, WAIT_OBJECT_0, WAIT_TIMEOUT}; +use windows::Win32::System::EventLog::{ + EVT_HANDLE, EvtClose, EvtNext, EvtOpenChannelConfig, EvtSubscribe, + EvtSubscribeStartAfterBookmark, EvtSubscribeStartAtOldestRecord, EvtSubscribeStrict, + EvtSubscribeToFutureEvents, +}; +#[cfg(test)] +use windows::Win32::System::Threading::SetEvent; +use windows::Win32::System::Threading::{CreateEventW, ResetEvent, WaitForMultipleObjects}; +use windows::core::HSTRING; + +use super::{ + bookmark::BookmarkManager, checkpoint::Checkpointer, config::WindowsEventLogConfig, error::*, + metadata, sid_resolver::SidResolver, xml_parser, +}; + +use crate::internal_events::WindowsEventLogBookmarkError; + +/// Maximum number of entries in the EvtFormatMessage result cache. +pub const FORMAT_CACHE_CAPACITY: usize = 10_000; +/// Maximum number of cached publisher metadata handles. +const PUBLISHER_CACHE_CAPACITY: usize = 256; + +/// RAII wrapper for EvtOpenPublisherMetadata handles. +/// Calls EvtClose on drop to prevent handle leaks when evicted from LRU cache. +pub struct PublisherHandle(pub isize); + +impl Drop for PublisherHandle { + fn drop(&mut self) { + if self.0 != 0 { + unsafe { + let _ = EvtClose(EVT_HANDLE(self.0)); + } + } + } +} + +// Win32 error codes extracted from the lower 16 bits of HRESULT. +// Using named constants instead of magic numbers for maintainability. +const ERROR_FILE_NOT_FOUND: u32 = 2; +const ERROR_ACCESS_DENIED: u32 = 5; +const ERROR_NO_MORE_ITEMS: u32 = 259; +const ERROR_EVT_QUERY_RESULT_STALE: u32 = 4317; +const ERROR_EVT_CHANNEL_NOT_FOUND: u32 = 0x3AA1; // 15009 +const ERROR_EVT_INVALID_QUERY: u32 = 15007; +const ERROR_EVT_QUERY_RESULT_INVALID_POSITION: u32 = 0x4239; // 16953 + +/// Per-channel subscription state for pull model. +struct ChannelSubscription { + channel: String, + subscription_handle: EVT_HANDLE, + signal_event: HANDLE, + bookmark: BookmarkManager, + /// Pre-registered counter for events read on this channel. + events_read_counter: Counter, + /// Pre-registered counter for render errors on this channel. + render_errors_counter: Counter, + /// Gauge indicating whether this channel subscription is active (1.0) or failed (0.0). + subscription_active_gauge: Gauge, + /// Gauge tracking the timestamp (unix seconds) of the last event received on this channel. + last_event_timestamp_gauge: Gauge, + /// Gauge tracking total record count in the channel log. + /// SOC teams use `rate(events_read_total)` vs this gauge to detect ingestion lag. + channel_records_gauge: Gauge, +} + +// SAFETY: Same rationale as EventLogSubscription - Windows kernel handles are thread-safe. +unsafe impl Send for ChannelSubscription {} + +/// Result of waiting for events across all channels. +pub enum WaitResult { + /// At least one channel has events available. + EventsAvailable, + /// Timeout expired without any events. + Timeout, + /// Shutdown was signaled. + Shutdown, +} + +/// Pull-model Windows Event Log subscription using EvtSubscribe + signal event + EvtNext. +/// +/// Instead of a callback (push model), we use: +/// 1. `CreateEventW` to create a manual-reset signal per channel +/// 2. `EvtSubscribe` with NULL callback (pull mode) and signal event +/// 3. `WaitForMultipleObjects` to wait for any channel signal or shutdown +/// 4. `EvtNext` to pull events in batches when signaled +/// +/// This eliminates event drops under back pressure because we don't call +/// `EvtNext` again until the pipeline has consumed the current batch. +pub struct EventLogSubscription { + config: Arc, + channels: Vec, + checkpointer: Arc, + rate_limiter: Option>, + shutdown_event: HANDLE, + render_buffer: Vec, + /// Cached EvtOpenPublisherMetadata handles keyed by provider name. + /// Bounded LRU; evicted handles are closed via `PublisherHandle::drop`. + publisher_cache: LruCache, + /// Cached EvtFormatMessage results. Outer key is provider name (looked up + /// via `&str` — zero allocation on the hot path), inner LRU is bounded per provider. + format_cache: HashMap>>, + /// Pre-registered counter for metadata cache hits. + cache_hits_counter: Counter, + /// Pre-registered counter for metadata cache misses. + cache_misses_counter: Counter, + /// SID-to-username resolver with LRU cache. + sid_resolver: SidResolver, + /// Reusable UTF-16 decode buffer to avoid per-event allocations. + decode_buffer: Vec, + /// Round-robin index for fair channel scheduling. Rotates the starting + /// channel each pull_events call to prevent a single busy channel + /// (e.g., Security on a domain controller) from starving others. + round_robin_index: usize, +} + +// SAFETY: Windows HANDLE and EVT_HANDLE are kernel objects safe to use across +// threads. In windows 0.58, HANDLE wraps *mut c_void which is !Send/!Sync, +// but the underlying kernel handles are thread-safe. +unsafe impl Send for EventLogSubscription {} + +impl EventLogSubscription { + /// Create a new pull-model subscription for all configured channels. + /// + /// Each channel gets its own signal event and EvtSubscribe handle. + /// A shutdown event is created for clean termination of blocking waits. + pub async fn new( + config: &WindowsEventLogConfig, + checkpointer: Arc, + _acknowledgements: bool, + ) -> Result { + // Create rate limiter if configured + let rate_limiter = if config.events_per_second > 0 { + NonZeroU32::new(config.events_per_second).map(|rate| { + info!( + message = "Enabling rate limiting for Windows Event Log source.", + events_per_second = config.events_per_second + ); + RateLimiter::direct(Quota::per_second(rate)) + }) + } else { + None + }; + + let config = Arc::new(config.clone()); + + // Validate channels exist and are accessible + Self::validate_channels(&config)?; + + // Store as isize while held across await points (HANDLE wraps *mut c_void which is !Send) + let shutdown_event_raw: isize = unsafe { + let h = CreateEventW(None, true, false, None).map_err(|e| { + WindowsEventLogError::ConfigError { + message: format!("Failed to create shutdown event: {e}"), + } + })?; + h.0 as isize + }; + + let mut channel_subscriptions = Vec::with_capacity(config.channels.len()); + + for channel in &config.channels { + // Initialize bookmark from checkpoint or create fresh + let (bookmark, has_valid_checkpoint) = if let Some(checkpoint) = + checkpointer.get(channel).await + { + match BookmarkManager::from_xml(&checkpoint.bookmark_xml) { + Ok(bm) => { + info!( + message = "Resuming from checkpoint bookmark.", + channel = %channel + ); + (bm, true) + } + Err(e) => { + warn!( + message = "Corrupted bookmark XML in checkpoint, creating fresh bookmark. Potential re-delivery of events.", + channel = %channel, + error = %e + ); + (BookmarkManager::new()?, false) + } + } + } else { + info!( + message = "No checkpoint found, creating fresh bookmark.", + channel = %channel + ); + (BookmarkManager::new()?, false) + }; + + // Create manual-reset signal event, initially signaled. + // Initially signaled ensures the first iteration drains any buffered events. + // Manual reset prevents missing signals between WaitForMultipleObjects return + // and EvtNext draining. + let signal_event = unsafe { + CreateEventW(None, true, true, None).map_err(|e| { + WindowsEventLogError::ConfigError { + message: format!( + "Failed to create signal event for channel '{channel}': {e}" + ), + } + })? + }; + + let channel_hstring = HSTRING::from(channel.as_str()); + let query = Self::build_xpath_query(&config)?; + let query_hstring = HSTRING::from(query.clone()); + + // Determine subscription flags. + // When resuming from a bookmark, OR in EvtSubscribeStrict (0x10000) so that + // Windows fails explicitly if the bookmark position is stale/invalid, + // rather than silently falling back to oldest-record. + let subscription_flags = if has_valid_checkpoint { + EvtSubscribeStartAfterBookmark.0 | EvtSubscribeStrict.0 + } else if config.read_existing_events { + EvtSubscribeStartAtOldestRecord.0 + } else { + EvtSubscribeToFutureEvents.0 + }; + + let fallback_flags = if config.read_existing_events { + EvtSubscribeStartAtOldestRecord.0 + } else { + EvtSubscribeToFutureEvents.0 + }; + + debug!( + message = "Creating pull-mode subscription.", + channel = %channel, + query = %query, + has_valid_checkpoint = has_valid_checkpoint, + read_existing = config.read_existing_events, + flags = format!("{:#x}", subscription_flags) + ); + + // EvtSubscribe with signal event and NULL callback = pull mode + let bookmark_handle = bookmark.as_handle(); + let subscription_result = unsafe { + if has_valid_checkpoint { + let strict_result = EvtSubscribe( + None, + signal_event, + &channel_hstring, + &query_hstring, + bookmark_handle, + None, // NULL context = pull mode + None, // NULL callback = pull mode + subscription_flags, + ); + match strict_result { + Ok(handle) => Ok(handle), + Err(e) => { + warn!( + message = "Strict bookmark subscribe failed, retrying without bookmark. Potential re-delivery of events.", + channel = %channel, + error = %e, + fallback_flags = format!("{:#x}", fallback_flags) + ); + EvtSubscribe( + None, + signal_event, + &channel_hstring, + &query_hstring, + None, // No bookmark for fallback + None, + None, + fallback_flags, + ) + } + } + } else { + EvtSubscribe( + None, + signal_event, + &channel_hstring, + &query_hstring, + None, // No bookmark for fresh start + None, // NULL context + None, // NULL callback + subscription_flags, + ) + } + }; + + match subscription_result { + Ok(subscription_handle) => { + info!( + message = "Pull-mode subscription created successfully.", + channel = %channel + ); + counter!( + "windows_event_log_subscriptions_total", + "channel" => channel.clone() + ) + .increment(1); + let subscription_active_gauge = gauge!( + "windows_event_log_subscription_active", + "channel" => channel.clone() + ); + subscription_active_gauge.set(1.0); + + channel_subscriptions.push(ChannelSubscription { + channel: channel.clone(), + events_read_counter: counter!( + "windows_event_log_events_read_total", + "channel" => channel.clone() + ), + render_errors_counter: counter!( + "windows_event_log_render_errors_total", + "channel" => channel.clone() + ), + subscription_active_gauge, + last_event_timestamp_gauge: gauge!( + "windows_event_log_last_event_timestamp_seconds", + "channel" => channel.clone() + ), + channel_records_gauge: gauge!( + "windows_event_log_channel_records_total", + "channel" => channel.clone() + ), + subscription_handle, + signal_event, + bookmark, + }); + } + Err(e) => { + let error_code = (e.code().0 as u32) & 0xFFFF; + if error_code == ERROR_EVT_CHANNEL_NOT_FOUND + || error_code == ERROR_EVT_INVALID_QUERY + { + warn!( + message = "Skipping channel (not found or invalid query).", + channel = %channel, + error_code = error_code + ); + unsafe { + let _ = CloseHandle(signal_event); + } + continue; + } else if error_code == ERROR_ACCESS_DENIED { + warn!( + message = "Skipping channel due to access denied.", + channel = %channel + ); + unsafe { + let _ = CloseHandle(signal_event); + } + continue; + } else { + // Clean up already-created subscriptions on failure + for sub in channel_subscriptions { + unsafe { + let _ = EvtClose(sub.subscription_handle); + let _ = CloseHandle(sub.signal_event); + } + } + unsafe { + let _ = + CloseHandle(HANDLE(shutdown_event_raw as *mut std::ffi::c_void)); + } + return Err(WindowsEventLogError::CreateSubscriptionError { source: e }); + } + } + } + } + + // Verify we subscribed to at least one channel + if channel_subscriptions.is_empty() { + unsafe { + let _ = CloseHandle(HANDLE(shutdown_event_raw as *mut std::ffi::c_void)); + } + return Err(WindowsEventLogError::ConfigError { + message: "No channels could be subscribed to. All channels may be inaccessible or direct/analytic channels.".into(), + }); + } + + info!( + message = "Successfully subscribed to channels (pull mode).", + channel_count = channel_subscriptions.len() + ); + + let shutdown_event = HANDLE(shutdown_event_raw as *mut std::ffi::c_void); + Ok(Self { + config, + channels: channel_subscriptions, + checkpointer, + rate_limiter, + shutdown_event, + render_buffer: vec![0u8; 16384], + publisher_cache: LruCache::new(NonZeroUsize::new(PUBLISHER_CACHE_CAPACITY).unwrap()), + format_cache: HashMap::new(), + cache_hits_counter: counter!("windows_event_log_cache_hits_total"), + cache_misses_counter: counter!("windows_event_log_cache_misses_total"), + sid_resolver: SidResolver::new(), + decode_buffer: vec![0u16; 8192], + round_robin_index: 0, + }) + } + + /// Wait for events to become available on any channel, or for shutdown. + /// + /// Uses `WaitForMultipleObjects` via `spawn_blocking` to avoid blocking the + /// Tokio runtime. The wait array includes all channel signal events plus the + /// shutdown event. + pub fn wait_for_events_blocking(&self, timeout_ms: u32) -> WaitResult { + // Build wait handle array: [channel0_signal, channel1_signal, ..., shutdown_event] + let mut handles: Vec = self.channels.iter().map(|c| c.signal_event).collect(); + handles.push(self.shutdown_event); + + let result = unsafe { WaitForMultipleObjects(&handles, false, timeout_ms) }; + + let shutdown_index = (self.channels.len()) as u32; + + match result { + r if r == WAIT_TIMEOUT => WaitResult::Timeout, + r if r.0 < WAIT_OBJECT_0.0 + shutdown_index => WaitResult::EventsAvailable, + r if r.0 == WAIT_OBJECT_0.0 + shutdown_index => WaitResult::Shutdown, + _ => { + // WAIT_FAILED or unexpected - treat as timeout to avoid tight loop + warn!( + message = "WaitForMultipleObjects returned unexpected result.", + result = result.0 + ); + WaitResult::Timeout + } + } + } + + /// Pull events from all signaled channels with fair scheduling. + /// + /// Each channel gets a per-channel budget of `max_events / num_channels` + /// to prevent a single busy channel (e.g., Security) from starving others. + /// The starting channel rotates each call via round-robin. Channels that + /// don't use their budget simply leave slots unused — the next pull_events + /// call reclaims them naturally since the signal stays set. + /// + /// # At-least-once delivery semantics + /// + /// If a bookmark update fails mid-batch, events processed *before* the + /// failure are still returned and sent downstream, but the bookmark position + /// does not advance. On restart, those events will be re-read from the + /// channel, resulting in duplicates. This is an intentional trade-off: + /// at-least-once delivery is preferable to data loss. + pub fn pull_events( + &mut self, + max_events: usize, + ) -> Result, WindowsEventLogError> { + let mut all_events = Vec::with_capacity(max_events.min(1000)); + let num_channels = self.channels.len().max(1); + let per_channel_budget = (max_events / num_channels).max(1); + let start = self.round_robin_index % num_channels; + self.round_robin_index = self.round_robin_index.wrapping_add(1); + + for i in 0..num_channels { + let channel_idx = (start + i) % num_channels; + let channel_sub = &mut self.channels[channel_idx]; + let channel_limit = per_channel_budget.min(max_events.saturating_sub(all_events.len())); + + if channel_limit == 0 { + break; + } + + let mut channel_drained = false; + let mut bookmark_failed = false; + let mut channel_count = 0usize; + + // Drain loop: keep calling EvtNext until ERROR_NO_MORE_ITEMS or channel budget. + // Only reset the signal once the channel is fully drained; if we hit the + // budget limit the signal stays set so WaitForMultipleObjects returns immediately. + 'drain: loop { + if channel_count >= channel_limit { + break; + } + + let batch_size = (channel_limit - channel_count).min(100); + let mut event_handles: Vec = vec![0isize; batch_size]; + let mut returned: u32 = 0; + + let result = unsafe { + EvtNext( + channel_sub.subscription_handle, + &mut event_handles, + 0, + 0, + &mut returned, + ) + }; + + if let Err(err) = result { + let code = (err.code().0 as u32) & 0xFFFF; + if code == ERROR_NO_MORE_ITEMS { + channel_drained = true; + break; + } + if code == ERROR_EVT_QUERY_RESULT_STALE { + debug!( + message = "Channel subscription ended.", + channel = %channel_sub.channel + ); + channel_drained = true; + break; + } + if code == ERROR_EVT_QUERY_RESULT_INVALID_POSITION { + warn!( + message = "Event log channel was cleared or query position invalidated, attempting re-subscription.", + channel = %channel_sub.channel + ); + match Self::resubscribe_channel(channel_sub, &self.config) { + Ok(()) => { + info!( + message = "Re-subscription succeeded after stale query.", + channel = %channel_sub.channel + ); + // Retry from fresh subscription — the signal will fire again + channel_drained = true; + break; + } + Err(e) => { + warn!( + message = "Re-subscription failed, will retry next cycle.", + channel = %channel_sub.channel, + error = %e + ); + channel_sub.subscription_active_gauge.set(0.0); + channel_drained = true; + break; + } + } + } + return Err(WindowsEventLogError::PullEventsError { + channel: channel_sub.channel.clone(), + source: err, + }); + } + + if returned == 0 { + channel_drained = true; + break; + } + + channel_sub.events_read_counter.increment(returned as u64); + channel_sub + .last_event_timestamp_gauge + .set(chrono::Utc::now().timestamp() as f64); + + let batch_handles = &event_handles[..returned as usize]; + for (idx, &raw_handle) in batch_handles.iter().enumerate() { + let event_handle = EVT_HANDLE(raw_handle); + + match super::render::render_event_xml( + &mut self.render_buffer, + &mut self.decode_buffer, + event_handle, + ) { + Ok(xml) => { + // Single-pass: parse all System fields in one traversal + let system_fields = xml_parser::parse_system_section(&xml); + + // Early pre-filter: discard non-matching event IDs before + // the expensive resolve_event_metadata / format_event_message + // calls. This guarantees improved performance even when + // XPath-level filtering is not applied (e.g. large ID lists). + if let Some(ref only_ids) = self.config.only_event_ids + && !only_ids.contains(&system_fields.event_id) + { + counter!("windows_event_log_events_filtered_total", "reason" => "event_id_prefilter") + .increment(1); + unsafe { + let _ = EvtClose(event_handle); + } + continue; + } + if self + .config + .ignore_event_ids + .contains(&system_fields.event_id) + { + counter!("windows_event_log_events_filtered_total", "reason" => "event_id_prefilter") + .increment(1); + unsafe { + let _ = EvtClose(event_handle); + } + continue; + } + + let channel_name = if system_fields.channel.is_empty() { + channel_sub.channel.clone() + } else { + system_fields.channel.clone() + }; + let provider_name = system_fields.provider_name.clone(); + let task_val = system_fields.task as u64; + let opcode_val = system_fields.opcode as u64; + let keywords_val = system_fields.keywords; + + let (task_name, opcode_name, keyword_names) = + if !provider_name.is_empty() { + metadata::resolve_event_metadata( + &mut self.publisher_cache, + &mut self.format_cache, + &self.cache_hits_counter, + &self.cache_misses_counter, + event_handle, + &provider_name, + task_val, + opcode_val, + keywords_val, + ) + } else { + (None, None, Vec::new()) + }; + + let rendered_message = + if self.config.render_message && !provider_name.is_empty() { + metadata::format_event_message( + &mut self.publisher_cache, + event_handle, + &provider_name, + ) + } else { + None + }; + + if let Ok(Some(mut event)) = xml_parser::build_event( + xml, + &channel_name, + &self.config, + rendered_message, + system_fields, + ) { + event.task_name = task_name; + event.opcode_name = opcode_name; + event.keyword_names = keyword_names; + + // Resolve SID to human-readable account name + if let Some(ref sid) = event.user_id { + if let Some(account_name) = self.sid_resolver.resolve(sid) { + event.user_name = Some(account_name); + } + } + + if let Err(e) = channel_sub.bookmark.update(event_handle) { + emit!(WindowsEventLogBookmarkError { + channel: channel_sub.channel.clone(), + error: e.to_string(), + }); + bookmark_failed = true; + // Events already in all_events will still be delivered + // (at-least-once semantics — see doc comment on pull_events). + // Close current handle normally + unsafe { + let _ = EvtClose(event_handle); + } + // Close remaining unprocessed handles to prevent leak + for &h in &batch_handles[idx + 1..] { + unsafe { + let _ = EvtClose(EVT_HANDLE(h)); + } + } + break 'drain; + } + all_events.push(event); + channel_count += 1; + } + } + Err(e) => { + channel_sub.render_errors_counter.increment(1); + warn!( + message = "Failed to render event XML.", + channel = %channel_sub.channel, + batch_index = idx, + event_handle = raw_handle, + error = %e + ); + } + } + + unsafe { + let _ = EvtClose(event_handle); + } + } + } + + if channel_drained && !bookmark_failed { + unsafe { + let _ = ResetEvent(channel_sub.signal_event); + } + + // Update channel record count gauge for lag detection. + super::render::update_channel_records( + &channel_sub.channel, + &channel_sub.channel_records_gauge, + ); + } + } + + Ok(all_events) + } + + /// Re-subscribe a channel after its query position becomes invalid + /// (e.g., an admin cleared the event log). Closes the old subscription + /// handle and creates a new one using the current bookmark. + fn resubscribe_channel( + channel_sub: &mut ChannelSubscription, + config: &WindowsEventLogConfig, + ) -> Result<(), WindowsEventLogError> { + // Close the stale subscription handle + unsafe { + let _ = EvtClose(channel_sub.subscription_handle); + } + + let channel_hstring = HSTRING::from(channel_sub.channel.as_str()); + let query = Self::build_xpath_query(config)?; + let query_hstring = HSTRING::from(query); + + let bookmark_handle = channel_sub.bookmark.as_handle(); + let has_bookmark = bookmark_handle.0 != 0; + + // Use EvtSubscribeStrict when resuming from bookmark so Windows fails + // explicitly if the bookmark position is stale, rather than silently + // falling back to oldest-record. + let subscription_flags = if has_bookmark { + EvtSubscribeStartAfterBookmark.0 | EvtSubscribeStrict.0 + } else { + EvtSubscribeStartAtOldestRecord.0 + }; + + let fallback_flags = if config.read_existing_events { + EvtSubscribeStartAtOldestRecord.0 + } else { + EvtSubscribeToFutureEvents.0 + }; + + let new_handle = unsafe { + if has_bookmark { + let strict_result = EvtSubscribe( + None, + channel_sub.signal_event, + &channel_hstring, + &query_hstring, + bookmark_handle, + None, + None, + subscription_flags, + ); + match strict_result { + Ok(handle) => Ok(handle), + Err(e) => { + warn!( + message = "Strict bookmark resubscribe failed, retrying without bookmark. Potential re-delivery of events.", + channel = %channel_sub.channel, + error = %e, + fallback_flags = format!("{:#x}", fallback_flags) + ); + EvtSubscribe( + None, + channel_sub.signal_event, + &channel_hstring, + &query_hstring, + None, + None, + None, + fallback_flags, + ) + } + } + } else { + EvtSubscribe( + None, + channel_sub.signal_event, + &channel_hstring, + &query_hstring, + None, + None, + None, + subscription_flags, + ) + } + } + .map_err(|e| WindowsEventLogError::CreateSubscriptionError { source: e })?; + + channel_sub.subscription_handle = new_handle; + channel_sub.subscription_active_gauge.set(1.0); + + counter!( + "windows_event_log_resubscriptions_total", + "channel" => channel_sub.channel.clone() + ) + .increment(1); + + Ok(()) + } + + /// Returns the raw shutdown event handle value for use in the async shutdown watcher. + /// + /// The returned pointer is the underlying value of the Windows HANDLE. It can be + /// safely copied and used from another thread to call `SetEvent` because Windows + /// kernel objects are reference-counted and remain valid as long as at least one + /// handle is open (which this subscription maintains until Drop). + pub const fn shutdown_event_raw(&self) -> *mut std::ffi::c_void { + self.shutdown_event.0 + } + + /// Returns a reference to the rate limiter, if configured. + pub const fn rate_limiter( + &self, + ) -> Option<&RateLimiter> { + self.rate_limiter.as_ref() + } + + /// Returns (total_channels, active_channels) for health reporting. + pub fn channel_health_summary(&self) -> (usize, usize) { + let total = self.channels.len(); + // A channel is considered active if its subscription handle is non-null + let active = self + .channels + .iter() + .filter(|c| c.subscription_handle.0 != 0) + .count(); + (total, active) + } + + /// Flush all bookmarks to checkpoint storage. + /// + /// Call this before shutdown to ensure no events are lost. + pub async fn flush_bookmarks(&mut self) -> Result<(), WindowsEventLogError> { + debug!(message = "Flushing bookmarks to checkpoint storage."); + + let bookmark_xmls: Vec<(String, String)> = self + .channels + .iter() + .filter_map( + |sub| match BookmarkManager::serialize_handle(sub.bookmark.as_handle()) { + Ok(xml) if xml_parser::is_valid_bookmark_xml(&xml) => { + Some((sub.channel.clone(), xml)) + } + Ok(_) => None, + Err(e) => { + emit!(WindowsEventLogBookmarkError { + channel: sub.channel.clone(), + error: e.to_string(), + }); + None + } + }, + ) + .collect(); + + if !bookmark_xmls.is_empty() { + self.checkpointer.set_batch(bookmark_xmls).await?; + counter!("windows_event_log_checkpoint_writes_total").increment(1); + } + + debug!(message = "Bookmark flush complete."); + Ok(()) + } + + /// Get the current bookmark XML for a specific channel. + /// + /// Used for acknowledgment-based checkpointing where the bookmark + /// state needs to be captured when events are read (not when they're acknowledged). + pub fn get_bookmark_xml(&self, channel: &str) -> Option { + self.channels + .iter() + .find(|sub| sub.channel == channel) + .and_then( + |sub| match BookmarkManager::serialize_handle(sub.bookmark.as_handle()) { + Ok(xml) if xml_parser::is_valid_bookmark_xml(&xml) => Some(xml), + _ => None, + }, + ) + } + + fn build_xpath_query(config: &WindowsEventLogConfig) -> Result { + build_xpath_query(config) + } + + fn validate_channels(config: &WindowsEventLogConfig) -> Result<(), WindowsEventLogError> { + for channel in &config.channels { + let channel_hstring = HSTRING::from(channel.as_str()); + let channel_handle = unsafe { EvtOpenChannelConfig(None, &channel_hstring, 0) }; + + match channel_handle { + Ok(handle) => { + if let Err(e) = unsafe { EvtClose(handle) } { + warn!(message = "Failed to close channel config handle.", error = %e); + } + } + Err(e) => { + let error_code = (e.code().0 as u32) & 0xFFFF; + if error_code == ERROR_FILE_NOT_FOUND + || error_code == ERROR_EVT_CHANNEL_NOT_FOUND + || error_code == ERROR_EVT_INVALID_QUERY + { + // Non-existent channels are skipped during EvtSubscribe below, + // so warn here rather than failing the entire source. + warn!( + message = "Channel not found, will be skipped.", + channel = %channel + ); + continue; + } else if error_code == ERROR_ACCESS_DENIED { + warn!( + message = "Channel access denied, will be skipped.", + channel = %channel + ); + continue; + } else { + return Err(WindowsEventLogError::OpenChannelError { + channel: channel.clone(), + source: e, + }); + } + } + } + } + + Ok(()) + } +} + +/// Maximum XPath query length supported by Windows Event Log API. +/// Queries exceeding this limit fall back to `"*"` (all events). +const XPATH_MAX_LENGTH: usize = 4096; + +/// Build an XPath query from config, incorporating `only_event_ids` when no +/// explicit `event_query` is set. +/// +/// When `only_event_ids` is configured and no custom `event_query` is provided, +/// generates a query like `*[System[EventID=4624 or EventID=4625]]` so that +/// the Windows API filters events at the source, avoiding the cost of pulling, +/// rendering, and discarding non-matching events. +/// +/// If the generated query exceeds [`XPATH_MAX_LENGTH`] (4096 chars), falls back +/// to `"*"` and lets the downstream filter in `build_event()` handle it. +pub(super) fn build_xpath_query( + config: &WindowsEventLogConfig, +) -> Result { + // Explicit event_query always takes precedence. + if let Some(ref custom_query) = config.event_query { + return Ok(custom_query.clone()); + } + + // Generate XPath from only_event_ids if present and non-empty. + if let Some(ref ids) = config.only_event_ids + && !ids.is_empty() + { + let query = if ids.len() == 1 { + format!("*[System[EventID={}]]", ids[0]) + } else { + let predicates: Vec = ids.iter().map(|id| format!("EventID={id}")).collect(); + format!("*[System[{}]]", predicates.join(" or ")) + }; + + if query.len() <= XPATH_MAX_LENGTH { + return Ok(query); + } + // Query too long — fall back to wildcard and rely on + // the in-process filter in build_event(). + warn!( + message = "Generated XPath query exceeds maximum length, falling back to wildcard.", + query_len = query.len(), + max_len = XPATH_MAX_LENGTH, + num_event_ids = ids.len(), + ); + } + + Ok("*".to_string()) +} + +impl Drop for EventLogSubscription { + fn drop(&mut self) { + // Close subscription handles and signal events + for sub in &self.channels { + unsafe { + let _ = EvtClose(sub.subscription_handle); + let _ = CloseHandle(sub.signal_event); + } + } + // Publisher metadata handles are closed automatically by PublisherHandle::drop + // when the LRU cache is dropped. + + // Close shutdown event + unsafe { + let _ = CloseHandle(self.shutdown_event); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + async fn create_test_checkpointer() -> (Arc, tempfile::TempDir) { + let temp_dir = tempfile::TempDir::new().unwrap(); + let checkpointer = Arc::new(Checkpointer::new(temp_dir.path()).await.unwrap()); + (checkpointer, temp_dir) + } + + #[test] + fn test_rate_limiter_configuration() { + let mut config = WindowsEventLogConfig::default(); + assert_eq!(config.events_per_second, 0); + + config.events_per_second = 1000; + assert_eq!(config.events_per_second, 1000); + } + + #[tokio::test] + async fn test_rate_limiter_disabled_by_default() { + let config = WindowsEventLogConfig::default(); + assert_eq!( + config.events_per_second, 0, + "Rate limiting should be disabled by default" + ); + } + + /// Test pull subscription creation and basic operation + #[tokio::test] + async fn test_pull_subscription_creation() { + let mut config = WindowsEventLogConfig::default(); + config.channels = vec!["Application".to_string()]; + config.event_timeout_ms = 1000; + + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + let subscription = EventLogSubscription::new(&config, checkpointer, false).await; + assert!( + subscription.is_ok(), + "Pull subscription creation should succeed: {:?}", + subscription.err() + ); + + let sub = subscription.unwrap(); + assert_eq!( + sub.channels.len(), + 1, + "Should have one channel subscription" + ); + } + + /// Test that wait_for_events_blocking returns timeout or events available + #[tokio::test] + async fn test_wait_for_events_timeout() { + let mut config = WindowsEventLogConfig::default(); + config.channels = vec!["Application".to_string()]; + config.read_existing_events = false; + config.event_timeout_ms = 100; + + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + let subscription = EventLogSubscription::new(&config, checkpointer, false) + .await + .expect("Subscription creation should succeed"); + + // Use ownership transfer pattern for spawn_blocking + let (subscription, result) = tokio::task::spawn_blocking(move || { + let r = subscription.wait_for_events_blocking(100); + (subscription, r) + }) + .await + .unwrap(); + + // The first call may return EventsAvailable since signals are initially signaled. + // That's expected behavior per the pull model design. + match result { + WaitResult::EventsAvailable | WaitResult::Timeout => {} + WaitResult::Shutdown => panic!("Should not get shutdown"), + } + + // Keep subscription alive until end of test + drop(subscription); + } + + /// Test that signal_shutdown wakes a waiting thread + #[tokio::test] + async fn test_shutdown_signal_wakes_wait() { + let mut config = WindowsEventLogConfig::default(); + config.channels = vec!["Application".to_string()]; + config.event_timeout_ms = 500; + + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + let subscription = EventLogSubscription::new(&config, checkpointer, false) + .await + .expect("Subscription creation should succeed"); + + // First drain the initially-signaled state using ownership transfer + let (subscription, _) = tokio::task::spawn_blocking(move || { + let r = subscription.wait_for_events_blocking(50); + (subscription, r) + }) + .await + .unwrap(); + + let shutdown_event_raw = subscription.shutdown_event_raw() as isize; + + let wait_handle = tokio::task::spawn_blocking(move || { + let r = subscription.wait_for_events_blocking(30000); + (subscription, r) + }); + + tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; + + unsafe { + let handle = HANDLE(shutdown_event_raw as *mut std::ffi::c_void); + let _ = SetEvent(handle); + } + + let (subscription, result) = wait_handle.await.unwrap(); + match result { + WaitResult::Shutdown => {} // Expected + WaitResult::EventsAvailable => { + // Acceptable - there may have been real events + } + WaitResult::Timeout => { + panic!("Should not timeout - shutdown should have woken the wait"); + } + } + + drop(subscription); + } + + /// Test pull_events with read_existing_events=true + #[tokio::test] + async fn test_pull_events_returns_events() { + let mut config = WindowsEventLogConfig::default(); + config.channels = vec!["Application".to_string()]; + config.read_existing_events = true; + config.event_timeout_ms = 2000; + + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + + let subscription = EventLogSubscription::new(&config, checkpointer, false) + .await + .expect("Subscription creation should succeed"); + + // Wait and pull using ownership transfer pattern + let (mut subscription, wait_result) = tokio::task::spawn_blocking(move || { + let r = subscription.wait_for_events_blocking(2000); + (subscription, r) + }) + .await + .unwrap(); + + match wait_result { + WaitResult::EventsAvailable => { + let events = subscription.pull_events(100).unwrap(); + assert!( + !events.is_empty(), + "With read_existing_events=true, should get historical events" + ); + } + WaitResult::Timeout => { + // Might happen on a system with empty Application log + } + WaitResult::Shutdown => panic!("Unexpected shutdown"), + } + } + + /// Test multiple concurrent pull subscriptions + #[tokio::test] + async fn test_multiple_concurrent_subscriptions() { + let mut config1 = WindowsEventLogConfig::default(); + config1.channels = vec!["Application".to_string()]; + config1.event_timeout_ms = 1000; + + let mut config2 = WindowsEventLogConfig::default(); + config2.channels = vec!["System".to_string()]; + config2.event_timeout_ms = 1000; + + let (checkpointer1, _temp_dir1) = create_test_checkpointer().await; + let (checkpointer2, _temp_dir2) = create_test_checkpointer().await; + + let sub1 = EventLogSubscription::new(&config1, checkpointer1, false) + .await + .expect("Subscription 1 (Application) should succeed"); + let sub2 = EventLogSubscription::new(&config2, checkpointer2, false) + .await + .expect("Subscription 2 (System) should succeed"); + + // Both should be independently functional + assert_eq!(sub1.channels.len(), 1); + assert_eq!(sub2.channels.len(), 1); + assert_eq!(sub1.channels[0].channel, "Application"); + assert_eq!(sub2.channels[0].channel, "System"); + } + + /// Test read_existing_events=false only receives future events + #[tokio::test] + async fn test_read_existing_events_false_only_receives_future_events() { + use chrono::Utc; + + let mut config = WindowsEventLogConfig::default(); + config.channels = vec!["Application".to_string()]; + config.read_existing_events = false; + config.event_timeout_ms = 500; + + let (checkpointer, _temp_dir) = create_test_checkpointer().await; + let subscription_start_time = Utc::now(); + + let mut subscription = EventLogSubscription::new(&config, checkpointer, false) + .await + .expect("Subscription creation should succeed"); + + // Brief wait then pull + tokio::time::sleep(tokio::time::Duration::from_millis(200)).await; + + let events = subscription.pull_events(100).unwrap_or_default(); + + let tolerance = chrono::Duration::seconds(5); + let earliest_allowed = subscription_start_time - tolerance; + + for event in &events { + assert!( + event.time_created >= earliest_allowed, + "Event timestamp {} is before subscription start time {} (minus tolerance). \ + read_existing_events=false may not be respected. Event ID: {}, Record ID: {}", + event.time_created, + subscription_start_time, + event.event_id, + event.record_id + ); + } + } + + /// Test that subscription gracefully handles an invalid/corrupted bookmark + /// from a checkpoint, falling back to a fresh bookmark without crashing. + #[tokio::test] + async fn test_checkpoint_with_invalid_bookmark_falls_back_gracefully() { + let temp_dir = tempfile::TempDir::new().unwrap(); + let checkpointer = Arc::new(Checkpointer::new(temp_dir.path()).await.unwrap()); + + let fake_bookmark = r#""#; + + checkpointer + .set("Application".to_string(), fake_bookmark.to_string()) + .await + .expect("Should be able to set checkpoint"); + + let mut config = WindowsEventLogConfig::default(); + config.channels = vec!["Application".to_string()]; + config.read_existing_events = true; + config.event_timeout_ms = 500; + + // The subscription should succeed even with a corrupted/invalid bookmark, + // gracefully falling back to a fresh bookmark. + let mut subscription = EventLogSubscription::new(&config, checkpointer, false) + .await + .expect("Subscription should succeed even with invalid bookmark checkpoint"); + + tokio::time::sleep(tokio::time::Duration::from_millis(200)).await; + + // Just verify we can pull events without panicking. + // The bookmark format above is not a real Windows bookmark, so the + // subscription will fall back to reading from scratch. We only assert + // that the subscription is functional. + let _events = subscription.pull_events(100).unwrap_or_default(); + } +} diff --git a/src/sources/windows_event_log/tests.rs b/src/sources/windows_event_log/tests.rs new file mode 100644 index 0000000000000..c5801d964c2c6 --- /dev/null +++ b/src/sources/windows_event_log/tests.rs @@ -0,0 +1,1648 @@ +use std::{collections::HashMap, time::Duration}; + +use chrono::Utc; +use vector_lib::config::LogNamespace; +use vrl::value::Value; + +use super::{config::*, error::*, parser::*, xml_parser::*}; +use crate::{ + config::SourceConfig, + test_util::components::{SOURCE_TAGS, run_and_assert_source_compliance}, +}; + +fn create_test_config() -> WindowsEventLogConfig { + WindowsEventLogConfig { + channels: vec!["System".to_string(), "Application".to_string()], + event_query: None, + connection_timeout_secs: 30, + read_existing_events: false, + batch_size: 10, + include_xml: false, + event_data_format: HashMap::new(), + ignore_event_ids: vec![], + only_event_ids: None, + max_event_age_secs: None, + event_timeout_ms: 5000, + log_namespace: Some(false), + field_filter: FieldFilter::default(), + data_dir: None, // Use Vector's global data_dir + events_per_second: 0, + max_event_data_length: 0, + checkpoint_interval_secs: 5, + acknowledgements: Default::default(), + render_message: false, + } +} + +/// Creates a realistic Security audit event (4624 = successful logon) for integration-level tests. +/// Note: parser.rs has its own simpler create_test_event() for unit testing parser logic. +fn create_test_event() -> WindowsEvent { + let mut event_data = HashMap::new(); + event_data.insert("TargetUserName".to_string(), "admin".to_string()); + event_data.insert("LogonType".to_string(), "2".to_string()); + + WindowsEvent { + record_id: 12345, + event_id: 4624, + level: 4, + task: 12544, + opcode: 0, + keywords: 0x8020000000000000, + time_created: Utc::now(), + provider_name: "Microsoft-Windows-Security-Auditing".to_string(), + provider_guid: Some("{54849625-5478-4994-a5ba-3e3b0328c30d}".to_string()), + channel: "Security".to_string(), + computer: "WIN-SERVER-01".to_string(), + user_id: Some("S-1-5-18".to_string()), + process_id: 716, + thread_id: 796, + activity_id: Some("{b25f4adf-d920-0000-0000-000000000000}".to_string()), + related_activity_id: None, + raw_xml: r#" + + + 4624 + 0 + 12544 + 0 + 0x8020000000000000 + + 12345 + + + Security + WIN-SERVER-01 + + + + admin + 2 + + "#.to_string(), + rendered_message: Some("An account was successfully logged on.".to_string()), + event_data, + user_data: HashMap::new(), + task_name: None, + opcode_name: None, + keyword_names: Vec::new(), + user_name: None, + version: Some(1), + qualifiers: Some(0), + string_inserts: vec!["admin".to_string(), "2".to_string()], + } +} + +#[cfg(test)] +mod config_tests { + use super::*; + use serde_json; + + #[test] + fn test_default_config_creation() { + let config = WindowsEventLogConfig::default(); + + assert_eq!(config.channels, vec!["System", "Application"]); + assert_eq!(config.connection_timeout_secs, 30); + assert_eq!(config.event_timeout_ms, 5000); + assert!(!config.read_existing_events); + assert_eq!(config.batch_size, 100); + assert!(!config.include_xml); + assert!(config.render_message); + assert!(config.field_filter.include_system_fields); + assert!(config.field_filter.include_event_data); + assert!(config.field_filter.include_user_data); + } + + #[test] + fn generate_config() { + crate::test_util::test_generate_config::(); + } + + #[test] + fn test_config_validation_success() { + let config = create_test_config(); + assert!(config.validate().is_ok()); + } + + #[test] + fn test_config_validation_empty_channels() { + let mut config = create_test_config(); + config.channels = vec![]; + + let result = config.validate(); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("At least one channel") + ); + } + + #[test] + fn test_config_validation_zero_connection_timeout() { + let mut config = create_test_config(); + config.connection_timeout_secs = 0; + + let result = config.validate(); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Connection timeout must be between") + ); + } + + #[test] + fn test_config_validation_zero_event_timeout() { + let mut config = create_test_config(); + config.event_timeout_ms = 0; + + let result = config.validate(); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Event timeout must be between") + ); + } + + #[test] + fn test_config_validation_zero_batch_size() { + let mut config = create_test_config(); + config.batch_size = 0; + + let result = config.validate(); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Batch size must be between 1 and") + ); + } + + #[test] + fn test_config_validation_empty_channel_name() { + let mut config = create_test_config(); + config.channels = vec!["System".to_string(), "".to_string()]; + + let result = config.validate(); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Channel names cannot be empty") + ); + } + + #[test] + fn test_config_validation_empty_query() { + let mut config = create_test_config(); + config.event_query = Some("".to_string()); + + let result = config.validate(); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Event query cannot be empty") + ); + } + + #[test] + fn test_config_serialization() { + let config = create_test_config(); + + let serialized = serde_json::to_string(&config).unwrap(); + let deserialized: WindowsEventLogConfig = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(config.channels, deserialized.channels); + assert_eq!( + config.connection_timeout_secs, + deserialized.connection_timeout_secs + ); + assert_eq!(config.event_timeout_ms, deserialized.event_timeout_ms); + assert_eq!(config.batch_size, deserialized.batch_size); + } + + #[test] + fn test_field_filter_configuration() { + let mut config = create_test_config(); + config.field_filter = FieldFilter { + include_fields: Some(vec!["event_id".to_string(), "level".to_string()]), + exclude_fields: Some(vec!["raw_xml".to_string()]), + include_system_fields: false, + include_event_data: true, + include_user_data: false, + }; + + assert!(config.validate().is_ok()); + assert!(!config.field_filter.include_system_fields); + assert!(config.field_filter.include_event_data); + assert!(!config.field_filter.include_user_data); + } + + #[test] + fn test_event_data_format_configuration() { + let mut config = create_test_config(); + config + .event_data_format + .insert("event_id".to_string(), EventDataFormat::String); + config + .event_data_format + .insert("process_id".to_string(), EventDataFormat::Integer); + config + .event_data_format + .insert("enabled".to_string(), EventDataFormat::Boolean); + + assert!(config.validate().is_ok()); + assert_eq!(config.event_data_format.len(), 3); + } + + #[test] + fn test_filtering_options() { + let mut config = create_test_config(); + config.ignore_event_ids = vec![4624, 4634]; + config.only_event_ids = Some(vec![1000, 1001, 1002]); + config.max_event_age_secs = Some(86400); + + assert!(config.validate().is_ok()); + assert_eq!(config.ignore_event_ids.len(), 2); + assert!(config.only_event_ids.is_some()); + assert_eq!(config.max_event_age_secs, Some(86400)); + } +} + +#[cfg(test)] +mod parser_tests { + use super::*; + + #[test] + fn test_parser_creation() { + let config = create_test_config(); + let _parser = EventLogParser::new(&config, LogNamespace::Legacy); + + // Should create without error - parser creation succeeds + // Note: Cannot test private fields directly + } + + #[test] + fn test_parse_basic_event() { + let config = create_test_config(); + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event.clone()).unwrap(); + + // Check core fields + assert_eq!(log_event.get("event_id"), Some(&Value::Integer(4624))); + assert_eq!(log_event.get("record_id"), Some(&Value::Integer(12345))); + assert_eq!( + log_event.get("level"), + Some(&Value::Bytes("Information".into())) + ); + assert_eq!(log_event.get("level_value"), Some(&Value::Integer(4))); + assert_eq!( + log_event.get("channel"), + Some(&Value::Bytes("Security".into())) + ); + assert_eq!( + log_event.get("provider_name"), + Some(&Value::Bytes("Microsoft-Windows-Security-Auditing".into())) + ); + assert_eq!( + log_event.get("computer"), + Some(&Value::Bytes("WIN-SERVER-01".into())) + ); + assert_eq!(log_event.get("process_id"), Some(&Value::Integer(716))); + assert_eq!(log_event.get("thread_id"), Some(&Value::Integer(796))); + } + + #[test] + fn test_parse_event_with_xml() { + let mut config = create_test_config(); + config.include_xml = true; + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event.clone()).unwrap(); + + // XML should be included + assert!(log_event.get("xml").is_some()); + if let Some(Value::Bytes(xml_bytes)) = log_event.get("xml") { + let xml_string = String::from_utf8_lossy(xml_bytes); + assert!(xml_string.contains("4624<")); + } + } + + #[test] + fn test_parse_event_with_event_data() { + let mut config = create_test_config(); + config.field_filter.include_event_data = true; + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event.clone()).unwrap(); + + // Event data should be included + if let Some(Value::Object(event_data)) = log_event.get("event_data") { + assert_eq!( + event_data.get("TargetUserName"), + Some(&Value::Bytes("admin".into())) + ); + assert_eq!(event_data.get("LogonType"), Some(&Value::Bytes("2".into()))); + } else { + panic!("event_data should be present and be an object"); + } + } + + #[test] + fn test_parse_event_with_custom_formatting() { + let mut config = create_test_config(); + config + .event_data_format + .insert("event_id".to_string(), EventDataFormat::String); + config + .event_data_format + .insert("process_id".to_string(), EventDataFormat::Float); + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let event = create_test_event(); + + let log_event = parser.parse_event(event.clone()).unwrap(); + + // event_id should be formatted as string + assert_eq!( + log_event.get("event_id"), + Some(&Value::Bytes("4624".into())) + ); + + // process_id should be formatted as float + if let Some(Value::Float(process_id)) = log_event.get("process_id") { + assert_eq!(process_id.into_inner(), 716.0); + } else { + panic!("process_id should be formatted as float"); + } + } + + #[test] + fn test_windows_event_level_names() { + let mut event = create_test_event(); + + // Level 0 (LogAlways / Security audit) maps to "Information" + event.level = 0; + assert_eq!(event.level_name(), "Information"); + + event.level = 1; + assert_eq!(event.level_name(), "Critical"); + + event.level = 2; + assert_eq!(event.level_name(), "Error"); + + event.level = 3; + assert_eq!(event.level_name(), "Warning"); + + event.level = 4; + assert_eq!(event.level_name(), "Information"); + + event.level = 5; + assert_eq!(event.level_name(), "Verbose"); + + event.level = 99; + assert_eq!(event.level_name(), "Unknown"); + } +} + +#[cfg(test)] +mod error_tests { + use super::*; + + #[test] + fn test_error_recoverability() { + // Recoverable errors + let recoverable_errors = vec![ + WindowsEventLogError::TimeoutError { timeout_secs: 30 }, + WindowsEventLogError::ResourceExhaustedError { + message: "test".to_string(), + }, + WindowsEventLogError::IoError { + source: std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout"), + }, + ]; + + for error in recoverable_errors { + assert!( + error.is_recoverable(), + "Error should be recoverable: {}", + error + ); + } + + // Non-recoverable errors + let non_recoverable_errors = vec![ + WindowsEventLogError::AccessDeniedError { + channel: "Security".to_string(), + }, + WindowsEventLogError::ChannelNotFoundError { + channel: "NonExistent".to_string(), + }, + WindowsEventLogError::InvalidXPathQuery { + query: "invalid".to_string(), + message: "syntax error".to_string(), + }, + WindowsEventLogError::ConfigError { + message: "invalid config".to_string(), + }, + ]; + + for error in non_recoverable_errors { + assert!( + !error.is_recoverable(), + "Error should not be recoverable: {}", + error + ); + } + } + + #[test] + fn test_error_user_messages() { + let error = WindowsEventLogError::AccessDeniedError { + channel: "Security".to_string(), + }; + let message = error.user_message(); + assert!(message.contains("Access denied")); + assert!(message.contains("Administrator")); + + let error = WindowsEventLogError::ChannelNotFoundError { + channel: "NonExistent".to_string(), + }; + let message = error.user_message(); + assert!(message.contains("not found")); + assert!(message.contains("NonExistent")); + + let error = WindowsEventLogError::InvalidXPathQuery { + query: "*[invalid]".to_string(), + message: "syntax error".to_string(), + }; + let message = error.user_message(); + assert!(message.contains("Invalid XPath query")); + assert!(message.contains("*[invalid]")); + } + + #[test] + fn test_error_conversions() { + // Test conversion from quick_xml::Error + let xml_error = quick_xml::Error::UnexpectedEof("test".to_string()); + let converted: WindowsEventLogError = xml_error.into(); + assert!(matches!( + converted, + WindowsEventLogError::ParseXmlError { .. } + )); + + // Test conversion from std::io::Error + let io_error = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "test"); + let converted: WindowsEventLogError = io_error.into(); + assert!(matches!(converted, WindowsEventLogError::IoError { .. })); + } +} + +#[cfg(test)] +mod subscription_tests { + use super::super::subscription::build_xpath_query; + use super::*; + + // Note: test_not_supported_error is in subscription.rs to avoid duplication + + #[test] + fn test_build_xpath_query_default_wildcard() { + let config = create_test_config(); + let query = build_xpath_query(&config).unwrap(); + assert_eq!( + query, "*", + "Default config with no event_query and no only_event_ids should return wildcard" + ); + } + + #[test] + fn test_build_xpath_query_explicit_event_query_takes_precedence() { + let mut config = create_test_config(); + config.event_query = Some("*[System[Provider[@Name='MyApp']]]".to_string()); + config.only_event_ids = Some(vec![4624, 4625]); + + let query = build_xpath_query(&config).unwrap(); + assert_eq!( + query, "*[System[Provider[@Name='MyApp']]]", + "Explicit event_query should take precedence over only_event_ids" + ); + } + + #[test] + fn test_build_xpath_query_single_event_id() { + let mut config = create_test_config(); + config.only_event_ids = Some(vec![4624]); + + let query = build_xpath_query(&config).unwrap(); + assert_eq!(query, "*[System[EventID=4624]]"); + } + + #[test] + fn test_build_xpath_query_multiple_event_ids() { + let mut config = create_test_config(); + config.only_event_ids = Some(vec![4624, 4625, 4634]); + + let query = build_xpath_query(&config).unwrap(); + assert_eq!( + query, + "*[System[EventID=4624 or EventID=4625 or EventID=4634]]" + ); + } + + #[test] + fn test_build_xpath_query_empty_only_event_ids_returns_wildcard() { + let mut config = create_test_config(); + config.only_event_ids = Some(vec![]); + + let query = build_xpath_query(&config).unwrap(); + assert_eq!( + query, "*", + "Empty only_event_ids list should return wildcard" + ); + } + + #[test] + fn test_build_xpath_query_large_list_falls_back_to_wildcard() { + let mut config = create_test_config(); + // Generate enough IDs to exceed 4096-char XPath limit. + // Each "EventID=NNNNN" is ~12 chars, " or " is 4, so ~16 per ID. + // 4096 / 16 ≈ 256, so 300 IDs should exceed the limit. + config.only_event_ids = Some((10000..10300).collect()); + + let query = build_xpath_query(&config).unwrap(); + assert_eq!( + query, "*", + "Large ID list exceeding 4096 chars should fall back to wildcard" + ); + } + + #[test] + fn test_build_xpath_query_moderate_list_generates_xpath() { + let mut config = create_test_config(); + // 10 IDs should comfortably fit within 4096 chars. + config.only_event_ids = Some(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + + let query = build_xpath_query(&config).unwrap(); + assert!( + query.starts_with("*[System["), + "Query should be XPath, got: {query}" + ); + assert!( + query.contains("EventID=1"), + "Query should contain EventID=1" + ); + assert!( + query.contains("EventID=10"), + "Query should contain EventID=10" + ); + assert!(query.len() <= 4096, "Query should fit within XPath limit"); + } + + #[test] + fn test_event_filtering_by_id() { + let mut config = create_test_config(); + config.ignore_event_ids = vec![4624, 4625]; + config.only_event_ids = Some(vec![1000, 1001]); + + // Configuration should be valid + assert!(config.validate().is_ok()); + + // Test event should be filtered out (4624 is in ignore list) + let event = create_test_event(); // event_id = 4624 + assert!(config.ignore_event_ids.contains(&event.event_id)); + + // Test only_event_ids filtering + if let Some(ref only_ids) = config.only_event_ids { + assert!(!only_ids.contains(&event.event_id)); + } + } + + #[test] + fn test_only_and_ignore_event_ids_interaction() { + // When both filters are set, only_event_ids narrows first, + // then ignore_event_ids can further exclude from that set. + let mut config = create_test_config(); + config.only_event_ids = Some(vec![1000, 1001, 1002]); + config.ignore_event_ids = vec![1001]; + + assert!(config.validate().is_ok()); + + // 1000 passes only_event_ids and is not in ignore list → accepted + assert!(config.only_event_ids.as_ref().unwrap().contains(&1000)); + assert!(!config.ignore_event_ids.contains(&1000)); + + // 1001 passes only_event_ids but is in ignore list → rejected + assert!(config.only_event_ids.as_ref().unwrap().contains(&1001)); + assert!(config.ignore_event_ids.contains(&1001)); + + // 9999 fails only_event_ids → rejected before ignore check + assert!(!config.only_event_ids.as_ref().unwrap().contains(&9999)); + } + + #[test] + fn test_only_event_ids_with_max_event_age() { + let mut config = create_test_config(); + config.only_event_ids = Some(vec![4624, 4625]); + config.max_event_age_secs = Some(3600); + + assert!(config.validate().is_ok()); + + // Both filters should be set independently + assert_eq!(config.only_event_ids.as_ref().unwrap().len(), 2); + assert_eq!(config.max_event_age_secs, Some(3600)); + } + + #[test] + fn test_build_xpath_query_with_ignore_event_ids_only() { + // ignore_event_ids does NOT generate XPath — it's handled in-process + // because XPath has no "NOT EventID=X" syntax. + let mut config = create_test_config(); + config.ignore_event_ids = vec![4624, 4625]; + + let query = build_xpath_query(&config).unwrap(); + assert_eq!( + query, "*", + "ignore_event_ids alone should not generate XPath filter" + ); + } + + #[test] + fn test_event_age_filtering() { + let mut config = create_test_config(); + config.max_event_age_secs = Some(86400); // 24 hours + + let mut event = create_test_event(); + + // Event from now should pass + event.time_created = Utc::now(); + let age = Utc::now().signed_duration_since(event.time_created); + assert!(age.num_seconds() <= 86400); + + // Event from 2 days ago should be filtered + event.time_created = Utc::now() - chrono::Duration::days(2); + let age = Utc::now().signed_duration_since(event.time_created); + assert!(age.num_seconds() > 86400); + } + + #[test] + fn test_xml_parsing_helpers() { + let xml = r#" + + + + 1 + 4 + 12345 + System + TEST-MACHINE + + + "#; + + assert_eq!(extract_xml_value(xml, "EventID"), Some("1".to_string())); + assert_eq!(extract_xml_value(xml, "Level"), Some("4".to_string())); + assert_eq!( + extract_xml_value(xml, "EventRecordID"), + Some("12345".to_string()) + ); + assert_eq!( + extract_xml_value(xml, "Channel"), + Some("System".to_string()) + ); + assert_eq!( + extract_xml_value(xml, "Computer"), + Some("TEST-MACHINE".to_string()) + ); + assert_eq!(extract_xml_value(xml, "NonExistent"), None); + } + + #[test] + fn test_xml_attribute_parsing() { + let xml = r#" + + + + + + + "#; + + assert_eq!( + extract_xml_attribute(xml, "Name"), + Some("Microsoft-Windows-Kernel-General".to_string()) + ); + assert_eq!( + extract_xml_attribute(xml, "SystemTime"), + Some("2025-08-29T00:15:41.123456Z".to_string()) + ); + assert_eq!(extract_xml_attribute(xml, "NonExistent"), None); + } + + #[test] + fn test_event_data_extraction() { + let xml = r#" + + + administrator + 0x3e7 + 2 + WIN-TEST + + + "#; + + let config = WindowsEventLogConfig::default(); + let event_data = extract_event_data(xml, &config); + + assert_eq!( + event_data.structured_data.get("TargetUserName"), + Some(&"administrator".to_string()) + ); + assert_eq!( + event_data.structured_data.get("TargetLogonId"), + Some(&"0x3e7".to_string()) + ); + assert_eq!( + event_data.structured_data.get("LogonType"), + Some(&"2".to_string()) + ); + assert_eq!( + event_data.structured_data.get("WorkstationName"), + Some(&"WIN-TEST".to_string()) + ); + } + + #[test] + fn test_security_limits() { + // Test XML element extraction with size limits + let large_xml = format!( + r#" + + + {} + + + "#, + "x".repeat(10000) + ); // Very large content + + // Should not panic or consume excessive memory + let result = extract_xml_value(&large_xml, "EventID"); + // Should either truncate or return None, but not crash + match result { + Some(value) => assert!(value.len() <= 4096, "Should limit extracted text size"), + None => {} // Acceptable if parsing fails due to size limits + } + } +} + +#[tokio::test] +async fn test_source_output_schema() { + let config = create_test_config(); + + // Test legacy namespace + let outputs = config.outputs(LogNamespace::Legacy); + assert_eq!(outputs.len(), 1); + + // Test vector namespace + let outputs = config.outputs(LogNamespace::Vector); + assert_eq!(outputs.len(), 1); +} + +#[tokio::test] +async fn test_source_resources() { + let config = create_test_config(); + let resources = config.resources(); + + assert_eq!(resources.len(), 2); + assert!(resources.iter().any(|r| r.to_string().contains("System"))); + assert!( + resources + .iter() + .any(|r| r.to_string().contains("Application")) + ); +} + +#[tokio::test] +async fn test_source_acknowledgements() { + let config = create_test_config(); + + // Windows Event Log source supports acknowledgements + assert!(config.can_acknowledge()); +} + +// Compliance tests +#[tokio::test] +async fn test_source_compliance() { + let data_dir = tempfile::tempdir().expect("failed to create temp data_dir"); + let mut config = create_test_config(); + config.data_dir = Some(data_dir.path().to_path_buf()); + run_and_assert_source_compliance(config, Duration::from_millis(100), &SOURCE_TAGS).await; +} + +// ================================================================================================ +// SECURITY TESTS - Critical security attack vector validation +// ================================================================================================ + +#[cfg(test)] +mod security_tests { + use super::*; + + /// Test XPath injection attack prevention + #[test] + fn test_xpath_injection_prevention() { + let mut config = create_test_config(); + + // Test JavaScript injection attempts + let javascript_attacks = vec![ + "javascript:alert('xss')", + "*[javascript:eval('malicious')]", + "System[javascript:document.write('attack')]", + "*[System[javascript:window.open()]]", + ]; + + for attack in javascript_attacks { + config.event_query = Some(attack.to_string()); + let result = config.validate(); + assert!( + result.is_err(), + "JavaScript injection '{}' should be blocked", + attack + ); + assert!( + result + .unwrap_err() + .to_string() + .contains("potentially unsafe pattern"), + "Error should mention unsafe pattern for: {}", + attack + ); + } + + // Test valid XPath queries should still work + let valid_queries = vec![ + "*[System[Level=1 or Level=2]]", + "*[System[(Level=1 or Level=2) and TimeCreated[timediff(@SystemTime) <= 86400000]]]", + "*[System[Provider[@Name='Microsoft-Windows-Security-Auditing']]]", + "Event/System[EventID=4624]", + ]; + + for valid_query in valid_queries { + config.event_query = Some(valid_query.to_string()); + let result = config.validate(); + assert!( + result.is_ok(), + "Valid XPath query '{}' should be allowed", + valid_query + ); + } + } + + /// Test resource exhaustion attack prevention + #[test] + fn test_resource_exhaustion_prevention() { + let mut config = create_test_config(); + + // Test excessive connection timeout (DoS prevention) + config.connection_timeout_secs = 0; + assert!( + config.validate().is_err(), + "Zero connection timeout should be rejected" + ); + + config.connection_timeout_secs = u64::MAX; + assert!( + config.validate().is_err(), + "Excessive connection timeout should be rejected" + ); + + config.connection_timeout_secs = 7200; // 2 hours + assert!( + config.validate().is_err(), + "Connection timeout > 3600 seconds should be rejected" + ); + + // Test excessive event timeout + config.connection_timeout_secs = 30; // Reset to valid value + config.event_timeout_ms = 0; + assert!( + config.validate().is_err(), + "Zero event timeout should be rejected" + ); + + config.event_timeout_ms = 100000; // 100 seconds + assert!( + config.validate().is_err(), + "Excessive event timeout should be rejected" + ); + + // Test excessive batch sizes (memory exhaustion prevention) + config.event_timeout_ms = 5000; // Reset to valid value + config.batch_size = 0; + assert!( + config.validate().is_err(), + "Zero batch size should be rejected" + ); + + config.batch_size = 100000; + assert!( + config.validate().is_err(), + "Excessive batch size should be rejected" + ); + } + + /// Test channel name validation (injection prevention) + #[test] + fn test_channel_name_security_validation() { + let mut config = create_test_config(); + + // Test dangerous channel names that config validation actually rejects: + // empty/whitespace, control characters (null, CRLF), and excessive length. + // Note: HTML tags, SQL fragments, and shell metacharacters are not rejected + // at config validation time — the Windows API handles those at subscription. + let excessive_length = "A".repeat(300); + let dangerous_channels = vec![ + "", // Empty channel + " ", // Whitespace only + "System\0", // Null byte injection + "System\r\nmalicious", // CRLF injection + &excessive_length, // Excessive length + ]; + + for dangerous_channel in &dangerous_channels { + config.channels = vec!["System".to_string(), dangerous_channel.to_string()]; + let result = config.validate(); + assert!( + result.is_err(), + "Dangerous channel name '{}' should be rejected", + dangerous_channel.escape_debug() + ); + } + + // Test valid channel names should work + let valid_channels = vec![ + "System", + "Application", + "Security", + "Windows PowerShell", + "Microsoft-Windows-Security-Auditing/Operational", + "Custom-Application_Log", + "Service-Name/Admin", + "Application and Services Logs/Custom", + ]; + + for valid_channel in valid_channels { + config.channels = vec!["System".to_string(), valid_channel.to_string()]; + let result = config.validate(); + assert!( + result.is_ok(), + "Valid channel name '{}' should be allowed", + valid_channel + ); + } + } + + /// Test excessive query length prevention + #[test] + fn test_excessive_query_length_prevention() { + let mut config = create_test_config(); + + // Test query length limits + let long_query = "*[System[".to_string() + &"Level=1 and ".repeat(1000) + "Level=2]]"; + config.event_query = Some(long_query); + let result = config.validate(); + assert!(result.is_err(), "Excessively long query should be rejected"); + assert!( + result + .unwrap_err() + .to_string() + .contains("exceeds maximum length"), + "Error should mention length limit" + ); + + // Test reasonable query length should work + let reasonable_query = "*[System[Level=1 or Level=2 or Level=3]]".to_string(); + config.event_query = Some(reasonable_query); + assert!( + config.validate().is_ok(), + "Reasonable length query should be allowed" + ); + } +} + +// ================================================================================================ +// BUFFER OVERFLOW AND MEMORY SAFETY TESTS +// ================================================================================================ + +#[cfg(test)] +mod buffer_safety_tests { + use super::*; + + /// Test XML parsing with malicious buffer sizes + #[test] + fn test_malformed_xml_buffer_safety() { + // Test extremely large XML documents (should be handled gracefully) + let large_xml = format!( + "{}", + "value".repeat(1000) // Reduced from 10000 for memory safety + ); + + // This should not panic or cause memory issues + let config = WindowsEventLogConfig::default(); + let result = extract_event_data(&large_xml, &config); + + // Should have some reasonable limit on parsed data + assert!( + result.structured_data.len() <= 100, + "Should limit parsed data size to prevent DoS" + ); + } + + /// Test XML parsing with deeply nested structures + #[test] + fn test_deeply_nested_xml_protection() { + // Create deeply nested XML structure (reduced nesting for memory safety) + let mut nested_xml = "".to_string(); + for i in 0..100 { + // Reduced from 1000 + nested_xml.push_str(&format!("", i)); + } + nested_xml.push_str("value"); + for i in (0..100).rev() { + nested_xml.push_str(&format!("", i)); + } + nested_xml.push_str(""); + + // This should not cause stack overflow or excessive memory usage + let config = WindowsEventLogConfig::default(); + let result = extract_event_data(&nested_xml, &config); + + // Should handle gracefully - either succeeds or fails safely + // The key is that it doesn't crash or consume excessive resources + assert!( + result.structured_data.len() <= 100, + "Should limit parsed data for deeply nested XML" + ); + } + + /// Test handling of XML with excessive attributes + #[test] + fn test_excessive_xml_attributes_handling() { + // Create XML with many attributes (reduced count for safety) + let mut xml_with_attrs = "".to_string(); + for i in 0..200 { + // Reduced from 5000 + xml_with_attrs.push_str(&format!( + "data{}", + i, i, i + )); + } + xml_with_attrs.push_str(""); + + // Should handle gracefully without memory exhaustion + let config = WindowsEventLogConfig::default(); + let result = extract_event_data(&xml_with_attrs, &config); + + // Should parse without panicking or memory exhaustion. + // extract_event_data does not impose an attribute count cap; + // it parses all well-formed Data elements present in the XML. + assert!( + result.structured_data.len() <= 200, + "Should parse attributes without memory issues" + ); + } +} + +// ================================================================================================ +// CONCURRENCY AND RACE CONDITION TESTS +// ================================================================================================ + +// ================================================================================================ +// ERROR INJECTION AND FAULT TOLERANCE TESTS +// ================================================================================================ + +#[cfg(test)] +mod fault_tolerance_tests { + use super::*; + + #[tokio::test] + async fn test_invalid_xml_handling() { + let invalid_xml = "not valid xml "; + let config = WindowsEventLogConfig::default(); + let result = extract_event_data(invalid_xml, &config); + // Should return empty result or handle gracefully without crashing + assert!( + result.structured_data.len() == 0, + "Invalid XML should result in empty data" + ); + } + + #[tokio::test] + async fn test_malicious_xml_handling() { + // Test various malicious XML patterns + let malicious_xmls = vec![ + "]>&xxe;".to_string(), + format!("", "x".repeat(100000)), // Large CDATA + format!("{}data{}", "".repeat(1000), "".repeat(1000)), // Deep nesting + ]; + + let config = WindowsEventLogConfig::default(); + for malicious_xml in &malicious_xmls { + let result = extract_event_data(&malicious_xml, &config); + // Should handle without crashing or excessive resource usage + assert!( + result.structured_data.len() <= 100, + "Malicious XML should be limited in processing" + ); + } + } +} + +// ================================================================================================ +// ACKNOWLEDGMENT TESTS +// ================================================================================================ + +#[cfg(test)] +mod acknowledgement_tests { + use super::*; + use crate::config::{SourceAcknowledgementsConfig, SourceConfig}; + + #[test] + fn test_acknowledgements_config_default_disabled() { + let config = WindowsEventLogConfig::default(); + // Acknowledgements should be disabled by default + assert!( + !config.acknowledgements.enabled(), + "Acknowledgements should be disabled by default" + ); + } + + #[test] + fn test_acknowledgements_config_enabled() { + let mut config = create_test_config(); + config.acknowledgements = SourceAcknowledgementsConfig::from(true); + assert!( + config.acknowledgements.enabled(), + "Acknowledgements should be enabled when configured" + ); + } + + #[test] + fn test_can_acknowledge_returns_true() { + let config = WindowsEventLogConfig::default(); + assert!( + config.can_acknowledge(), + "can_acknowledge() should return true to support acknowledgements" + ); + } + + #[test] + fn test_acknowledgements_config_serialization() { + // Test that acknowledgements config serializes correctly + let config = WindowsEventLogConfig { + acknowledgements: SourceAcknowledgementsConfig::from(true), + ..Default::default() + }; + + let serialized = serde_json::to_string(&config).expect("serialization should succeed"); + assert!( + serialized.contains("acknowledgements"), + "Serialized config should contain acknowledgements field" + ); + + // Test deserialization + let deserialized: WindowsEventLogConfig = + serde_json::from_str(&serialized).expect("deserialization should succeed"); + assert!( + deserialized.acknowledgements.enabled(), + "Acknowledgements should be enabled after deserialization" + ); + } + + #[test] + fn test_acknowledgements_toml_parsing() { + // Test parsing from TOML with acknowledgements enabled + let toml_with_acks = r#" + channels = ["System"] + acknowledgements = true + "#; + let config: WindowsEventLogConfig = + toml::from_str(toml_with_acks).expect("TOML parsing should succeed"); + assert!( + config.acknowledgements.enabled(), + "Acknowledgements should be enabled from TOML" + ); + + // Test parsing with acknowledgements as struct + let toml_with_acks_struct = r#" + channels = ["System"] + [acknowledgements] + enabled = true + "#; + let config: WindowsEventLogConfig = + toml::from_str(toml_with_acks_struct).expect("TOML parsing should succeed"); + assert!( + config.acknowledgements.enabled(), + "Acknowledgements should be enabled from TOML struct" + ); + + // Test parsing without acknowledgements (default) + let toml_without_acks = r#" + channels = ["System"] + "#; + let config: WindowsEventLogConfig = + toml::from_str(toml_without_acks).expect("TOML parsing should succeed"); + assert!( + !config.acknowledgements.enabled(), + "Acknowledgements should be disabled by default" + ); + } +} + +// ================================================================================================ +// RATE LIMITING TESTS +// ================================================================================================ + +#[cfg(test)] +mod rate_limiting_tests { + use super::*; + + #[test] + fn test_rate_limiting_config_default_disabled() { + let config = WindowsEventLogConfig::default(); + assert_eq!( + config.events_per_second, 0, + "Rate limiting should be disabled by default (0)" + ); + } + + #[test] + fn test_rate_limiting_config_enabled() { + let mut config = create_test_config(); + config.events_per_second = 100; + assert!( + config.validate().is_ok(), + "Rate limiting config should be valid" + ); + assert_eq!(config.events_per_second, 100); + } + + #[test] + fn test_rate_limiting_toml_parsing() { + let toml_with_rate_limit = r#" + channels = ["System"] + events_per_second = 50 + "#; + let config: WindowsEventLogConfig = + toml::from_str(toml_with_rate_limit).expect("TOML parsing should succeed"); + assert_eq!( + config.events_per_second, 50, + "Rate limiting should be parsed from TOML" + ); + } + + #[test] + fn test_rate_limiting_serialization() { + let mut config = create_test_config(); + config.events_per_second = 100; + + let serialized = serde_json::to_string(&config).expect("serialization should succeed"); + assert!( + serialized.contains("events_per_second"), + "Serialized config should contain events_per_second" + ); + + let deserialized: WindowsEventLogConfig = + serde_json::from_str(&serialized).expect("deserialization should succeed"); + assert_eq!( + deserialized.events_per_second, 100, + "events_per_second should be preserved after serialization" + ); + } +} + +// ================================================================================================ +// CHECKPOINT TESTS +// ================================================================================================ + +#[cfg(test)] +mod checkpoint_tests { + use super::*; + + #[test] + fn test_checkpoint_data_dir_config() { + let mut config = create_test_config(); + config.data_dir = Some(std::path::PathBuf::from("/tmp/vector-test")); + assert!( + config.validate().is_ok(), + "Config with data_dir should be valid" + ); + } + + #[test] + fn test_checkpoint_toml_parsing() { + let toml_with_data_dir = r#" + channels = ["System"] + data_dir = "/var/lib/vector/wineventlog" + "#; + let config: WindowsEventLogConfig = + toml::from_str(toml_with_data_dir).expect("TOML parsing should succeed"); + assert!( + config.data_dir.is_some(), + "data_dir should be parsed from TOML" + ); + } + + #[test] + fn test_checkpoint_path_construction() { + // Verify that the checkpoint module exists and can be used + let _ = std::mem::size_of::(); + // The actual file operations would require Windows, so we only validate type availability. + } +} + +// ================================================================================================ +// MESSAGE RENDERING TESTS +// ================================================================================================ + +#[cfg(test)] +mod message_rendering_tests { + use super::*; + + #[test] + fn test_render_message_config_default() { + let config = WindowsEventLogConfig::default(); + assert!( + config.render_message, + "render_message should be enabled by default for compatibility with Event Viewer" + ); + } + + #[test] + fn test_render_message_config_enabled() { + let toml_with_render = r#" + channels = ["System"] + render_message = true + "#; + let config: WindowsEventLogConfig = + toml::from_str(toml_with_render).expect("TOML parsing should succeed"); + assert!( + config.render_message, + "render_message should be enabled from TOML" + ); + } + + #[test] + fn test_render_message_false_uses_fallback() { + // When render_message is false, the parser should use fallback message + let config = WindowsEventLogConfig { + render_message: false, + ..Default::default() + }; + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + + // Create event without rendered_message + let mut event = create_test_event(); + event.rendered_message = None; + event.event_data.clear(); // No message in event_data either + event.string_inserts.clear(); // Clear string inserts to reach fallback path + + let log_event = parser.parse_event(event.clone()).unwrap(); + + // Should have fallback message format: "Event ID X from Provider on Computer" + if let Some(message) = log_event.get("message") { + let msg_str = message.to_string_lossy(); + assert!( + msg_str.contains("Event ID") || msg_str.contains(&event.event_id.to_string()), + "Fallback message should contain Event ID: got '{}'", + msg_str + ); + } + } + + #[test] + fn test_render_message_true_uses_rendered() { + // When render_message is true and rendered_message is available, use it + let config = WindowsEventLogConfig { + render_message: true, + ..Default::default() + }; + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + + // Create event with rendered_message + let mut event = create_test_event(); + event.rendered_message = Some("The service started successfully.".to_string()); + + let log_event = parser.parse_event(event).unwrap(); + + if let Some(message) = log_event.get("message") { + let msg_str = message.to_string_lossy(); + assert_eq!( + msg_str, "The service started successfully.", + "Should use rendered_message when available" + ); + } + } + + #[test] + fn test_render_message_serialization() { + let mut config = create_test_config(); + config.render_message = true; + + let serialized = serde_json::to_string(&config).expect("serialization should succeed"); + assert!( + serialized.contains("render_message"), + "Serialized config should contain render_message" + ); + + let deserialized: WindowsEventLogConfig = + serde_json::from_str(&serialized).expect("deserialization should succeed"); + assert!( + deserialized.render_message, + "render_message should be preserved after serialization" + ); + } +} + +// ================================================================================================ +// TRUNCATION TESTS +// ================================================================================================ + +#[cfg(test)] +mod truncation_tests { + use super::*; + + #[test] + fn test_max_event_data_length_config() { + let mut config = create_test_config(); + config.max_event_data_length = 100; + assert!( + config.validate().is_ok(), + "Config with max_event_data_length should be valid" + ); + } + + #[test] + fn test_max_event_data_length_toml_parsing() { + let toml_with_truncation = r#" + channels = ["System"] + max_event_data_length = 256 + "#; + let config: WindowsEventLogConfig = + toml::from_str(toml_with_truncation).expect("TOML parsing should succeed"); + assert_eq!( + config.max_event_data_length, 256, + "max_event_data_length should be parsed from TOML" + ); + } + + #[test] + fn test_truncation_marker_format() { + // max_event_data_length applies to event_data/user_data values, + // not to string_inserts which are passed through verbatim. + // Verify string_inserts are preserved at full length. + let config = WindowsEventLogConfig { + max_event_data_length: 50, + ..Default::default() + }; + + let mut event = create_test_event(); + event.string_inserts = vec!["A".repeat(200)]; + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + let log_event = parser.parse_event(event).unwrap(); + + let inserts = log_event + .get("string_inserts") + .expect("string_inserts should be present"); + if let Value::Array(arr) = inserts { + assert!(!arr.is_empty(), "string_inserts should not be empty"); + let first = arr[0].to_string_lossy(); + assert_eq!( + first.len(), + 200, + "string_inserts should be preserved at full length" + ); + } else { + panic!("string_inserts should be an array"); + } + } + + #[test] + fn test_xml_truncation_limit() { + // XML should be truncated at 32KB limit + let mut config = create_test_config(); + config.include_xml = true; + + let parser = EventLogParser::new(&config, LogNamespace::Legacy); + + // Create event with large XML + let mut event = create_test_event(); + event.raw_xml = "A".repeat(40000); // 40KB, exceeds limit + + let log_event = parser.parse_event(event).unwrap(); + + if let Some(Value::Bytes(xml)) = log_event.get("xml") { + // XML should be truncated or limited + assert!( + xml.len() <= 40000, + "XML should be handled without memory issues" + ); + } + } + + #[test] + fn test_config_validation_max_channels() { + let mut config = create_test_config(); + + // 63 channels should be fine (MAXIMUM_WAIT_OBJECTS - 1 for shutdown event) + config.channels = (0..63).map(|i| format!("Channel{i}")).collect(); + assert!(config.validate().is_ok(), "63 channels should be accepted"); + + // 64 channels should fail + config.channels = (0..64).map(|i| format!("Channel{i}")).collect(); + let result = config.validate(); + assert!(result.is_err(), "64 channels should be rejected"); + assert!( + result + .unwrap_err() + .to_string() + .contains("Too many channels"), + "Error should mention too many channels" + ); + } + + #[test] + fn test_config_validation_channel_name_at_max_length() { + let mut config = create_test_config(); + // 256 chars is exactly at the limit — should pass + config.channels = vec!["A".repeat(256)]; + assert!( + config.validate().is_ok(), + "256-char channel name should be accepted" + ); + + // 257 chars exceeds the limit — should fail + config.channels = vec!["A".repeat(257)]; + assert!( + config.validate().is_err(), + "257-char channel name should be rejected" + ); + } + + #[test] + fn test_config_validation_xpath_query_at_max_length() { + let mut config = create_test_config(); + // Exactly 4096 chars — should pass + let padded = format!("*{}", "x".repeat(4095)); + assert_eq!(padded.len(), 4096); + config.event_query = Some(padded); + assert!( + config.validate().is_ok(), + "4096-char XPath query should be accepted" + ); + + // 4097 chars — should fail + let padded = format!("*{}", "x".repeat(4096)); + assert_eq!(padded.len(), 4097); + config.event_query = Some(padded); + assert!( + config.validate().is_err(), + "4097-char XPath query should be rejected" + ); + } + + #[test] + fn test_config_validation_event_ids_at_max_size() { + let mut config = create_test_config(); + // 1000 IDs is exactly at the limit — should pass + config.only_event_ids = Some((1..=1000).collect()); + assert!( + config.validate().is_ok(), + "1000 event IDs should be accepted" + ); + + // 1001 IDs exceeds the limit — should fail + config.only_event_ids = Some((1..=1001).collect()); + assert!( + config.validate().is_err(), + "1001 event IDs should be rejected" + ); + } +} diff --git a/src/sources/windows_event_log/xml_parser.rs b/src/sources/windows_event_log/xml_parser.rs new file mode 100644 index 0000000000000..c3e073cb88d60 --- /dev/null +++ b/src/sources/windows_event_log/xml_parser.rs @@ -0,0 +1,1134 @@ +use std::collections::HashMap; + +use chrono::{DateTime, Utc}; +use metrics::counter; +use quick_xml::{Reader, events::Event as XmlEvent}; + +use super::config::WindowsEventLogConfig; +use super::error::*; + +/// Truncate a string at a UTF-8 safe boundary, appending a suffix. +pub(crate) fn truncate_utf8(s: &mut String, max_bytes: usize) { + if s.len() <= max_bytes { + return; + } + let mut end = max_bytes; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + s.truncate(end); + s.push_str("...[truncated]"); +} + +/// System fields extracted from Windows Event Log XML via single-pass parsing. +#[derive(Debug, Clone, Default)] +pub struct SystemFields { + pub event_id: u32, + pub level: u8, + pub task: u16, + pub opcode: u8, + pub keywords: u64, + pub version: Option, + pub qualifiers: Option, + pub record_id: u64, + pub activity_id: Option, + pub related_activity_id: Option, + pub process_id: u32, + pub thread_id: u32, + pub channel: String, + pub computer: String, + pub user_id: Option, + pub provider_name: String, + pub provider_guid: Option, + /// Raw timestamp string from TimeCreated/@SystemTime. + pub system_time: Option, +} + +/// Result from EventData parsing (supports both named and positional formats). +#[derive(Debug, Clone)] +pub struct EventDataResult { + pub structured_data: HashMap, + pub string_inserts: Vec, + pub user_data: HashMap, +} + +/// Represents a Windows Event Log event. +#[derive(Debug, Clone)] +pub struct WindowsEvent { + pub record_id: u64, + pub event_id: u32, + pub level: u8, + pub task: u16, + pub opcode: u8, + pub keywords: u64, + pub time_created: DateTime, + pub provider_name: String, + pub provider_guid: Option, + pub channel: String, + pub computer: String, + pub user_id: Option, + pub process_id: u32, + pub thread_id: u32, + pub activity_id: Option, + pub related_activity_id: Option, + pub raw_xml: String, + pub rendered_message: Option, + pub event_data: HashMap, + pub user_data: HashMap, + pub task_name: Option, + pub opcode_name: Option, + pub keyword_names: Vec, + /// Resolved account name from user_id SID (e.g. "NT AUTHORITY\SYSTEM"). + pub user_name: Option, + pub version: Option, + pub qualifiers: Option, + pub string_inserts: Vec, +} + +impl WindowsEvent { + /// Returns the human-readable level name for this event. + /// + /// Level 0 maps to "Information" per standard convention. Windows uses + /// Level=0 for "LogAlways" and for all Security audit events. Mapping it to + /// "Information" prevents SOC analysts from seeing "Unknown" on every logon event. + pub const fn level_name(&self) -> &'static str { + match self.level { + 0 => "Information", + 1 => "Critical", + 2 => "Error", + 3 => "Warning", + 4 => "Information", + 5 => "Verbose", + _ => "Unknown", + } + } +} + +/// Tracks which element's text content we are currently collecting. +#[derive(Clone, Copy, PartialEq, Eq)] +enum TextTarget { + None, + EventID, + Version, + Level, + Task, + Opcode, + Keywords, + EventRecordID, + Channel, + Computer, +} + +/// Parse the System section of Windows Event Log XML in a single pass. +/// +/// Replaces ~28 individual `extract_xml_value`/`extract_xml_attribute`/ +/// `extract_provider_name` calls with one `quick_xml::Reader` traversal. +pub fn parse_system_section(xml: &str) -> SystemFields { + let mut fields = SystemFields::default(); + let mut reader = Reader::from_str(xml); + reader.trim_text(true); + let mut buf = Vec::new(); + + let mut in_system = false; + let mut text_target = TextTarget::None; + let mut text_buf = String::new(); + + const MAX_ITERATIONS: usize = 2000; + let mut iterations = 0; + + loop { + if iterations >= MAX_ITERATIONS { + break; + } + iterations += 1; + + match reader.read_event_into(&mut buf) { + Ok(XmlEvent::Start(ref e)) => { + let local = e.name().local_name(); + let local = local.as_ref(); + + if local == b"System" { + in_system = true; + } else if in_system { + text_target = TextTarget::None; + text_buf.clear(); + + match local { + b"Provider" => extract_provider_attrs(e, &mut fields), + b"EventID" => { + extract_qualifiers_attr(e, &mut fields); + text_target = TextTarget::EventID; + } + b"Version" => text_target = TextTarget::Version, + b"Level" => text_target = TextTarget::Level, + b"Task" => text_target = TextTarget::Task, + b"Opcode" => text_target = TextTarget::Opcode, + b"Keywords" => text_target = TextTarget::Keywords, + b"TimeCreated" => extract_time_created_attr(e, &mut fields), + b"EventRecordID" => text_target = TextTarget::EventRecordID, + b"Correlation" => extract_correlation_attrs(e, &mut fields), + b"Execution" => extract_execution_attrs(e, &mut fields), + b"Channel" => text_target = TextTarget::Channel, + b"Computer" => text_target = TextTarget::Computer, + b"Security" => extract_security_attrs(e, &mut fields), + _ => {} + } + } + } + Ok(XmlEvent::Empty(ref e)) => { + if !in_system { + if e.name().local_name().as_ref() == b"System" { + // Empty — nothing to extract + break; + } + buf.clear(); + continue; + } + let local = e.name().local_name(); + let local = local.as_ref(); + match local { + b"Provider" => extract_provider_attrs(e, &mut fields), + b"TimeCreated" => extract_time_created_attr(e, &mut fields), + b"Correlation" => extract_correlation_attrs(e, &mut fields), + b"Execution" => extract_execution_attrs(e, &mut fields), + b"Security" => extract_security_attrs(e, &mut fields), + _ => {} + } + } + Ok(XmlEvent::Text(ref e)) => { + if in_system && text_target != TextTarget::None { + if let Ok(text) = e.unescape() { + if text_buf.len() + text.len() <= 4096 { + text_buf.push_str(&text); + } + } + } + } + Ok(XmlEvent::End(ref e)) => { + let local = e.name().local_name(); + let local = local.as_ref(); + if local == b"System" { + // Commit any pending text before exiting + commit_text(&text_target, &text_buf, &mut fields); + break; + } + if in_system && text_target != TextTarget::None { + commit_text(&text_target, &text_buf, &mut fields); + text_target = TextTarget::None; + text_buf.clear(); + } + } + Ok(XmlEvent::Eof) => break, + Err(_) => break, + _ => {} + } + + buf.clear(); + } + + fields +} + +/// Commit collected element text into the appropriate SystemFields field. +fn commit_text(target: &TextTarget, text: &str, fields: &mut SystemFields) { + let trimmed = text.trim(); + if trimmed.is_empty() { + return; + } + match target { + TextTarget::EventID => fields.event_id = trimmed.parse().unwrap_or(0), + TextTarget::Version => fields.version = trimmed.parse().ok(), + TextTarget::Level => fields.level = trimmed.parse().unwrap_or(0), + TextTarget::Task => fields.task = trimmed.parse().unwrap_or(0), + TextTarget::Opcode => fields.opcode = trimmed.parse().unwrap_or(0), + TextTarget::Keywords => fields.keywords = parse_keywords_hex(trimmed), + TextTarget::EventRecordID => fields.record_id = trimmed.parse().unwrap_or(0), + TextTarget::Channel => fields.channel = trimmed.to_string(), + TextTarget::Computer => fields.computer = trimmed.to_string(), + TextTarget::None => {} + } +} + +fn parse_keywords_hex(s: &str) -> u64 { + s.strip_prefix("0x") + .or_else(|| s.strip_prefix("0X")) + .and_then(|hex| u64::from_str_radix(hex, 16).ok()) + .or_else(|| s.parse::().ok()) + .unwrap_or(0) +} + +fn extract_provider_attrs(e: &quick_xml::events::BytesStart<'_>, fields: &mut SystemFields) { + for attr in e.attributes().flatten() { + match attr.key.local_name().as_ref() { + b"Name" => fields.provider_name = String::from_utf8_lossy(&attr.value).into_owned(), + b"Guid" => { + fields.provider_guid = Some(String::from_utf8_lossy(&attr.value).into_owned()) + } + _ => {} + } + } +} + +fn extract_qualifiers_attr(e: &quick_xml::events::BytesStart<'_>, fields: &mut SystemFields) { + for attr in e.attributes().flatten() { + if attr.key.local_name().as_ref() == b"Qualifiers" { + fields.qualifiers = String::from_utf8_lossy(&attr.value).parse().ok(); + } + } +} + +fn extract_time_created_attr(e: &quick_xml::events::BytesStart<'_>, fields: &mut SystemFields) { + for attr in e.attributes().flatten() { + if attr.key.local_name().as_ref() == b"SystemTime" { + fields.system_time = Some(String::from_utf8_lossy(&attr.value).into_owned()); + } + } +} + +fn extract_correlation_attrs(e: &quick_xml::events::BytesStart<'_>, fields: &mut SystemFields) { + for attr in e.attributes().flatten() { + match attr.key.local_name().as_ref() { + b"ActivityID" => { + fields.activity_id = Some(String::from_utf8_lossy(&attr.value).into_owned()) + } + b"RelatedActivityID" => { + fields.related_activity_id = Some(String::from_utf8_lossy(&attr.value).into_owned()) + } + _ => {} + } + } +} + +fn extract_execution_attrs(e: &quick_xml::events::BytesStart<'_>, fields: &mut SystemFields) { + for attr in e.attributes().flatten() { + match attr.key.local_name().as_ref() { + b"ProcessID" => { + fields.process_id = String::from_utf8_lossy(&attr.value).parse().unwrap_or(0) + } + b"ThreadID" => { + fields.thread_id = String::from_utf8_lossy(&attr.value).parse().unwrap_or(0) + } + _ => {} + } + } +} + +fn extract_security_attrs(e: &quick_xml::events::BytesStart<'_>, fields: &mut SystemFields) { + for attr in e.attributes().flatten() { + if attr.key.local_name().as_ref() == b"UserID" { + fields.user_id = Some(String::from_utf8_lossy(&attr.value).into_owned()); + } + } +} + +/// Build a WindowsEvent from pre-parsed SystemFields and raw XML. +/// +/// Applies event ID filters, age filters, and parses EventData/UserData. +/// Returns `Ok(None)` for filtered events. +pub fn build_event( + xml: String, + channel: &str, + config: &WindowsEventLogConfig, + rendered_message: Option, + system_fields: SystemFields, +) -> Result, WindowsEventLogError> { + let record_id = system_fields.record_id; + let event_id = system_fields.event_id; + + if record_id == 0 && event_id == 0 { + debug!( + message = "Failed to parse event XML - no valid EventID or RecordID found.", + channel = %channel + ); + return Ok(None); + } + + // Apply event ID filters early + if let Some(ref only_ids) = config.only_event_ids + && !only_ids.contains(&event_id) + { + counter!("windows_event_log_events_filtered_total", "reason" => "event_id_not_in_only_list") + .increment(1); + return Ok(None); + } + + if config.ignore_event_ids.contains(&event_id) { + counter!("windows_event_log_events_filtered_total", "reason" => "event_id_ignored") + .increment(1); + return Ok(None); + } + + // Parse timestamp + let time_created = system_fields + .system_time + .as_deref() + .and_then(|s| { + DateTime::parse_from_rfc3339(s) + .or_else(|_| DateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f%z")) + .or_else(|_| DateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%z")) + .ok() + }) + .map(|dt| { + let dt_utc = dt.with_timezone(&Utc); + let diff = (Utc::now() - dt_utc).num_days().abs(); + if diff > 365 * 10 { + warn!( + message = "Event timestamp is more than 10 years from current time.", + timestamp = %dt_utc, + channel = %channel, + record_id = record_id, + ); + } + dt_utc + }) + .unwrap_or_else(Utc::now); + + // Apply age filter + if let Some(max_age_secs) = config.max_event_age_secs { + let age = Utc::now().signed_duration_since(time_created); + if age.num_seconds() > max_age_secs as i64 { + counter!("windows_event_log_events_filtered_total", "reason" => "max_age_exceeded") + .increment(1); + return Ok(None); + } + } + + let event_data_result = extract_event_data(&xml, config); + + let event = WindowsEvent { + record_id, + event_id, + level: system_fields.level, + task: system_fields.task, + opcode: system_fields.opcode, + keywords: system_fields.keywords, + time_created, + provider_name: system_fields.provider_name, + provider_guid: system_fields.provider_guid, + channel: if system_fields.channel.is_empty() { + channel.to_string() + } else { + system_fields.channel + }, + computer: system_fields.computer, + user_id: system_fields.user_id, + process_id: system_fields.process_id, + thread_id: system_fields.thread_id, + activity_id: system_fields.activity_id, + related_activity_id: system_fields.related_activity_id, + rendered_message, + raw_xml: if config.include_xml { + let mut raw = xml; + truncate_utf8(&mut raw, 32768); + raw + } else { + String::new() + }, + event_data: event_data_result.structured_data, + user_data: event_data_result.user_data, + task_name: None, + opcode_name: None, + keyword_names: Vec::new(), + user_name: None, + version: system_fields.version, + qualifiers: system_fields.qualifiers, + string_inserts: event_data_result.string_inserts, + }; + + Ok(Some(event)) +} + +/// Convenience wrapper: parse System section + build event in one call. +#[cfg(test)] +pub fn parse_event_xml( + xml: String, + channel: &str, + config: &WindowsEventLogConfig, + rendered_message: Option, +) -> Result, WindowsEventLogError> { + let system_fields = parse_system_section(&xml); + build_event(xml, channel, config, rendered_message, system_fields) +} + +/// Extract EventData and UserData sections from event XML. +pub fn extract_event_data(xml: &str, config: &WindowsEventLogConfig) -> EventDataResult { + let mut structured_data = HashMap::new(); + let mut string_inserts = Vec::new(); + let mut user_data = HashMap::new(); + + parse_section(xml, "EventData", &mut structured_data, &mut string_inserts); + parse_section(xml, "UserData", &mut user_data, &mut Vec::new()); + + // Apply configurable truncation + if config.max_event_data_length > 0 { + for value in structured_data.values_mut() { + truncate_utf8(value, config.max_event_data_length); + } + for value in user_data.values_mut() { + truncate_utf8(value, config.max_event_data_length); + } + for value in string_inserts.iter_mut() { + truncate_utf8(value, config.max_event_data_length); + } + } + + EventDataResult { + structured_data, + string_inserts, + user_data, + } +} + +/// Parse a specific XML section (EventData or UserData). +fn parse_section( + xml: &str, + section_name: &str, + named_data: &mut HashMap, + inserts: &mut Vec, +) { + let mut reader = Reader::from_str(xml); + reader.trim_text(true); + + let mut buf = Vec::new(); + let mut inside_section = false; + let mut inside_data = false; + let mut current_data_name = String::new(); + let mut current_data_value = String::new(); + + const MAX_ITERATIONS: usize = 500; + const MAX_FIELDS: usize = 100; + let mut iterations = 0; + + loop { + if iterations >= MAX_ITERATIONS + || (named_data.len() >= MAX_FIELDS || inserts.len() >= MAX_FIELDS) + { + break; + } + iterations += 1; + + match reader.read_event_into(&mut buf) { + Ok(XmlEvent::Start(ref e)) => { + let name = e.name(); + if name.as_ref() == section_name.as_bytes() { + inside_section = true; + } else if inside_section && name.as_ref() == b"Data" { + inside_data = true; + current_data_name.clear(); + current_data_value.clear(); + + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"Name" { + let name_value = String::from_utf8_lossy(&attr.value); + if name_value.len() <= 128 && !name_value.trim().is_empty() { + current_data_name = name_value.into_owned(); + } + break; + } + } + } + } + Ok(XmlEvent::End(ref e)) => { + let name = e.name(); + if name.as_ref() == section_name.as_bytes() { + inside_section = false; + } else if name.as_ref() == b"Data" && inside_data { + inside_data = false; + + if !current_data_name.is_empty() { + named_data.insert(current_data_name.clone(), current_data_value.clone()); + } else if section_name == "EventData" && inserts.len() < MAX_FIELDS { + inserts.push(current_data_value.clone()); + } + } + } + Ok(XmlEvent::Text(ref e)) => { + if inside_section + && inside_data + && let Ok(text) = e.unescape() + { + const MAX_VALUE_SIZE: usize = 1024 * 1024; + if current_data_value.len() + text.len() <= MAX_VALUE_SIZE { + current_data_value.push_str(&text); + } + } + } + Ok(XmlEvent::Eof) => break, + Err(_) => break, + _ => {} + } + + buf.clear(); + } +} + +/// Check if bookmark XML is valid (contains an actual bookmark position). +pub fn is_valid_bookmark_xml(xml: &str) -> bool { + !xml.is_empty() && xml.contains(" Option { + let mut reader = Reader::from_str(xml); + reader.trim_text(true); + + let mut buf = Vec::new(); + let mut inside_target = false; + let mut current_element = String::new(); + + const MAX_ITERATIONS: usize = 5000; + let mut iterations = 0; + + loop { + if iterations >= MAX_ITERATIONS { + warn!(message = "XML parsing iteration limit exceeded."); + return None; + } + iterations += 1; + + match reader.read_event_into(&mut buf) { + Ok(XmlEvent::Start(ref e)) => { + let name = e.name(); + let element_name = String::from_utf8_lossy(name.as_ref()); + if element_name == tag { + inside_target = true; + current_element.clear(); + } + } + Ok(XmlEvent::Text(ref e)) => { + if inside_target { + match e.unescape() { + Ok(text) => { + if current_element.len() + text.len() > 4096 { + warn!(message = "XML element text too long, truncating."); + break; + } + current_element.push_str(&text); + } + Err(_) => return None, + } + } + } + Ok(XmlEvent::End(ref e)) => { + let name = e.name(); + let element_name = String::from_utf8_lossy(name.as_ref()); + if element_name == tag && inside_target { + return Some(current_element.trim().to_string()); + } + } + Ok(XmlEvent::Eof) => break, + Err(_) => return None, + _ => {} + } + + buf.clear(); + } + + None +} + +/// Extract an XML attribute value by attribute name via string search. +/// +/// Prefer `parse_system_section` for bulk System field extraction (single pass). +/// This function is retained for one-off lookups outside the hot path. +#[cfg(test)] +pub fn extract_xml_attribute(xml: &str, attr_name: &str) -> Option { + let needle = format!("{attr_name}='"); + if let Some(start) = xml.find(&needle) { + let value_start = start + needle.len(); + if let Some(end) = xml[value_start..].find('\'') { + return Some(xml[value_start..value_start + end].to_string()); + } + } + let needle = format!("{attr_name}=\""); + if let Some(start) = xml.find(&needle) { + let value_start = start + needle.len(); + if let Some(end) = xml[value_start..].find('"') { + return Some(xml[value_start..value_start + end].to_string()); + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + const FULL_EVENT_XML: &str = r#" + + + + 1 + 2 + 4 + 100 + 1 + 0x8000000000000000 + + 12345 + + + System + TEST-MACHINE + + + + "#; + + #[test] + fn test_parse_system_section_full() { + let fields = parse_system_section(FULL_EVENT_XML); + + assert_eq!(fields.provider_name, "Microsoft-Windows-Kernel-General"); + assert_eq!( + fields.provider_guid.as_deref(), + Some("{A68CA8B7-004F-D7B6-A698-07E2DE0F1F5D}") + ); + assert_eq!(fields.event_id, 1); + assert_eq!(fields.qualifiers, Some(16384)); + assert_eq!(fields.version, Some(2)); + assert_eq!(fields.level, 4); + assert_eq!(fields.task, 100); + assert_eq!(fields.opcode, 1); + assert_eq!(fields.keywords, 0x8000000000000000); + assert_eq!( + fields.system_time.as_deref(), + Some("2025-08-29T00:15:41.123456Z") + ); + assert_eq!(fields.record_id, 12345); + assert_eq!(fields.activity_id.as_deref(), Some("{AAAA-BBBB}")); + assert_eq!(fields.related_activity_id.as_deref(), Some("{CCCC-DDDD}")); + assert_eq!(fields.process_id, 1234); + assert_eq!(fields.thread_id, 5678); + assert_eq!(fields.channel, "System"); + assert_eq!(fields.computer, "TEST-MACHINE"); + assert_eq!(fields.user_id.as_deref(), Some("S-1-5-18")); + } + + #[test] + fn test_parse_system_section_minimal() { + let xml = r#" + + + + 42 + Application + PC + + + "#; + + let fields = parse_system_section(xml); + assert_eq!(fields.provider_name, "TestProvider"); + assert_eq!(fields.event_id, 42); + assert_eq!(fields.channel, "Application"); + assert_eq!(fields.computer, "PC"); + assert_eq!(fields.level, 0); + assert_eq!(fields.record_id, 0); + assert!(fields.provider_guid.is_none()); + assert!(fields.system_time.is_none()); + } + + #[test] + fn test_parse_system_section_stops_at_end_of_system() { + // Ensure parser stops after and doesn't scan EventData + let xml = r#" + + + + 1 + App + PC + + + ShouldNotBeUsed + + + "#; + + let fields = parse_system_section(xml); + assert_eq!(fields.channel, "App"); + assert_eq!(fields.provider_name, "P1"); + } + + #[test] + fn test_extract_xml_value() { + let xml = r#" + + + + 1 + 4 + 12345 + System + TEST-MACHINE + + + "#; + + assert_eq!(extract_xml_value(xml, "EventID"), Some("1".to_string())); + assert_eq!(extract_xml_value(xml, "Level"), Some("4".to_string())); + assert_eq!( + extract_xml_value(xml, "EventRecordID"), + Some("12345".to_string()) + ); + assert_eq!( + extract_xml_value(xml, "Channel"), + Some("System".to_string()) + ); + assert_eq!( + extract_xml_value(xml, "Computer"), + Some("TEST-MACHINE".to_string()) + ); + assert_eq!(extract_xml_value(xml, "NonExistent"), None); + } + + #[test] + fn test_extract_xml_attribute() { + let xml = r#" + + + + + + + "#; + + assert_eq!( + extract_xml_attribute(xml, "Name"), + Some("Microsoft-Windows-Kernel-General".to_string()) + ); + assert_eq!( + extract_xml_attribute(xml, "SystemTime"), + Some("2025-08-29T00:15:41.123456Z".to_string()) + ); + assert_eq!(extract_xml_attribute(xml, "NonExistent"), None); + } + + #[test] + fn test_windows_event_level_name() { + let event = WindowsEvent { + record_id: 1, + event_id: 1000, + level: 2, + task: 0, + opcode: 0, + keywords: 0, + time_created: Utc::now(), + provider_name: "Test".to_string(), + provider_guid: None, + channel: "Test".to_string(), + computer: "localhost".to_string(), + user_id: None, + process_id: 0, + thread_id: 0, + activity_id: None, + related_activity_id: None, + raw_xml: String::new(), + rendered_message: None, + event_data: HashMap::new(), + user_data: HashMap::new(), + task_name: None, + opcode_name: None, + keyword_names: Vec::new(), + user_name: None, + version: Some(1), + qualifiers: Some(0), + string_inserts: vec![], + }; + + assert_eq!(event.level_name(), "Error"); + } + + #[test] + fn test_level_0_maps_to_information() { + let mut event = WindowsEvent { + record_id: 1, + event_id: 4624, + level: 0, + task: 12544, + opcode: 0, + keywords: 0x0020000000000000, + time_created: Utc::now(), + provider_name: "Microsoft-Windows-Security-Auditing".to_string(), + provider_guid: None, + channel: "Security".to_string(), + computer: "localhost".to_string(), + user_id: None, + process_id: 0, + thread_id: 0, + activity_id: None, + related_activity_id: None, + raw_xml: String::new(), + rendered_message: None, + event_data: HashMap::new(), + user_data: HashMap::new(), + task_name: None, + opcode_name: None, + keyword_names: Vec::new(), + user_name: None, + version: Some(2), + qualifiers: Some(0), + string_inserts: vec![], + }; + + assert_eq!(event.level_name(), "Information"); + + event.level = 4; + assert_eq!(event.level_name(), "Information"); + } + + #[test] + fn test_security_limits() { + let large_xml = format!( + r#" + + + {} + + + "#, + "x".repeat(10000) + ); + + let result = extract_xml_value(&large_xml, "EventID"); + assert!( + result.is_none(), + "Security limits should reject excessively large XML content" + ); + } + + #[test] + fn test_configurable_truncation_disabled_by_default() { + let config = WindowsEventLogConfig::default(); + assert_eq!( + config.max_event_data_length, 0, + "Event data truncation should be disabled by default" + ); + } + + #[test] + fn test_event_data_truncation_when_enabled() { + let xml = r#" + + + This is a very long value that should be truncated when the limit is set + Short + + + "#; + + let mut config = WindowsEventLogConfig::default(); + config.max_event_data_length = 20; + + let result = extract_event_data(xml, &config); + + let long_value = result.structured_data.get("LongValue").unwrap(); + assert!( + long_value.ends_with("...[truncated]"), + "Long value should be truncated" + ); + assert!( + long_value.len() <= 20 + "...[truncated]".len(), + "Truncated value should respect limit" + ); + + let short_value = result.structured_data.get("ShortValue").unwrap(); + assert_eq!(short_value, "Short", "Short value should not be truncated"); + assert!( + !short_value.contains("truncated"), + "Short value should not have truncation marker" + ); + } + + #[test] + fn test_event_data_no_truncation_when_disabled() { + let xml = r#" + + + This is a very long value that should NOT be truncated when truncation is disabled by setting max_event_data_length to 0 + + + "#; + + let config = WindowsEventLogConfig::default(); + assert_eq!( + config.max_event_data_length, 0, + "Default should be no truncation" + ); + + let result = extract_event_data(xml, &config); + + let long_value = result.structured_data.get("LongValue").unwrap(); + assert!( + !long_value.ends_with("...[truncated]"), + "Value should not be truncated when limit is 0" + ); + assert!(long_value.len() > 100, "Full value should be preserved"); + assert!( + long_value.contains("disabled by setting max_event_data_length to 0"), + "Full text should be present" + ); + } + + #[test] + fn test_is_valid_bookmark_xml() { + let valid = r#" + +"#; + assert!( + is_valid_bookmark_xml(valid), + "Should accept valid bookmark with RecordId" + ); + + assert!(!is_valid_bookmark_xml(""), "Should reject empty string"); + + let empty_list = ""; + assert!( + !is_valid_bookmark_xml(empty_list), + "Should reject empty BookmarkList" + ); + + let empty_list2 = ""; + assert!( + !is_valid_bookmark_xml(empty_list2), + "Should reject BookmarkList without Bookmark element" + ); + + let no_record_id = ""; + assert!( + !is_valid_bookmark_xml(no_record_id), + "Should reject Bookmark without RecordId" + ); + } + + #[test] + fn test_parse_event_xml_basic() { + let xml = r#" + + + + 1000 + 4 + 12345 + + Application + TEST-PC + + + "#; + + let config = WindowsEventLogConfig::default(); + + let result = parse_event_xml(xml.to_string(), "Application", &config, None); + + let event = result.unwrap().unwrap(); + assert_eq!(event.event_id, 1000); + assert_eq!(event.record_id, 12345); + assert_eq!(event.provider_name, "TestProvider"); + assert_eq!(event.channel, "Application"); + assert_eq!(event.computer, "TEST-PC"); + assert!( + event.rendered_message.is_none(), + "rendered_message should be None when not provided" + ); + } + + #[test] + fn test_parse_event_xml_with_rendered_message() { + let xml = r#" + + + + 1000 + 4 + 12345 + + Application + TEST-PC + + + "#; + + let config = WindowsEventLogConfig::default(); + let rendered_msg = Some("The application started successfully.".to_string()); + + let result = parse_event_xml(xml.to_string(), "Application", &config, rendered_msg); + + let event = result.unwrap().unwrap(); + assert_eq!(event.event_id, 1000); + assert_eq!( + event.rendered_message, + Some("The application started successfully.".to_string()) + ); + } + + #[test] + fn test_keywords_hex_parsing() { + assert_eq!(parse_keywords_hex("0x8000000000000000"), 0x8000000000000000); + assert_eq!(parse_keywords_hex("0X8000000000000000"), 0x8000000000000000); + assert_eq!(parse_keywords_hex("12345"), 12345); + assert_eq!(parse_keywords_hex("invalid"), 0); + assert_eq!(parse_keywords_hex("0x0020000000000000"), 0x0020000000000000); + } + + #[test] + fn test_max_event_age_secs_filters_old_events() { + let xml = r#" + + + + 1000 + 4 + 12345 + + Application + TEST-PC + + + "#; + + let mut config = WindowsEventLogConfig::default(); + config.max_event_age_secs = Some(3600); // 1 hour + + let result = parse_event_xml(xml.to_string(), "Application", &config, None); + assert!( + result.unwrap().is_none(), + "Old event should be filtered by max_event_age_secs" + ); + } + + #[test] + fn test_max_event_age_secs_allows_recent_events() { + // Use a timestamp very close to now + let now = Utc::now().format("%Y-%m-%dT%H:%M:%S%.6fZ").to_string(); + let xml = format!( + r#" + + + + 1000 + 4 + 12345 + + Application + TEST-PC + + + "# + ); + + let mut config = WindowsEventLogConfig::default(); + config.max_event_age_secs = Some(3600); // 1 hour + + let result = parse_event_xml(xml, "Application", &config, None); + assert!( + result.unwrap().is_some(), + "Recent event should pass max_event_age_secs filter" + ); + } +} diff --git a/src/unit_test.rs b/src/unit_test.rs index 95616b4d40740..733d1deca1b5d 100644 --- a/src/unit_test.rs +++ b/src/unit_test.rs @@ -188,14 +188,17 @@ pub async fn cmd(opts: &Opts, signal_handler: &mut signal::SignalHandler) -> exi match junit_reporter.write_reports(test_suite_elapsed) { Ok(()) => {} Err(error) => { - error!("Failed to execute tests:\n{}.", error); + error!("Failed to write test output:\n{}.", error); return exitcode::CONFIG; } } } } Err(errors) => { - error!("Failed to execute tests:\n{}.", errors.join("\n")); + #[allow(clippy::print_stderr)] + { + eprintln!("Failed to execute tests:\n{}.", errors.join("\n")); + } return exitcode::CONFIG; } } diff --git a/tests/behavior/config/secret.toml b/tests/behavior/config/secret.toml index a4066e5ca5e3c..24432c0bf0ef4 100644 --- a/tests/behavior/config/secret.toml +++ b/tests/behavior/config/secret.toml @@ -23,6 +23,8 @@ .foobarbaz = "SECRET[exec_backend.def]" .foobarbazqux = "SECRET[file_backend.ghi]" .foobarbazquxquux = "SECRET[directory_backend.jkl]" + .nested_path_username = "SECRET[directory_backend.nested/username]" + .nested_path_password = "SECRET[directory_backend.nested/password]" ''' [[tests]] @@ -40,4 +42,6 @@ .foobarbaz == "def.retrieved" .foobarbazqux == "ghi.retrieved" .foobarbazquxquux == "jkl.retrieved" + .nested_path_username == "Gandalf" + .nested_path_password == "YouShallNotPass" ''' diff --git a/tests/behavior/transforms/remap.toml b/tests/behavior/transforms/remap.toml index c4401c9494141..e4690df7aab85 100644 --- a/tests/behavior/transforms/remap.toml +++ b/tests/behavior/transforms/remap.toml @@ -1021,9 +1021,9 @@ inputs = [] type = "remap" source = """ - .a = floor(.num) - .b = floor(.num, precision: 1) - .c = floor(.num, precision: 2) + .a = floor!(.num) + .b = floor!(.num, precision: 1) + .c = floor!(.num, precision: 2) """ [[tests]] name = "remap_function_floor" diff --git a/tests/data/secret-backends/directory-secrets/nested/password b/tests/data/secret-backends/directory-secrets/nested/password new file mode 100644 index 0000000000000..af0c367026544 --- /dev/null +++ b/tests/data/secret-backends/directory-secrets/nested/password @@ -0,0 +1 @@ +YouShallNotPass \ No newline at end of file diff --git a/tests/data/secret-backends/directory-secrets/nested/username b/tests/data/secret-backends/directory-secrets/nested/username new file mode 100644 index 0000000000000..9a1b235840c3c --- /dev/null +++ b/tests/data/secret-backends/directory-secrets/nested/username @@ -0,0 +1 @@ +Gandalf \ No newline at end of file diff --git a/tests/e2e/datadog-metrics/dogstatsd_client/Dockerfile b/tests/e2e/datadog-metrics/dogstatsd_client/Dockerfile index e8769539c8092..92d52e391c9f4 100644 --- a/tests/e2e/datadog-metrics/dogstatsd_client/Dockerfile +++ b/tests/e2e/datadog-metrics/dogstatsd_client/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7-alpine +FROM python:3.7-alpine@sha256:f3d31c8677d03f0b3c724446077f229a6ce9d3ac430f5c08cd7dff00292048c3 COPY . /app WORKDIR /app diff --git a/tests/integration/cli.rs b/tests/integration/cli.rs index 860125a09ac6e..0b62a43338303 100644 --- a/tests/integration/cli.rs +++ b/tests/integration/cli.rs @@ -2,6 +2,7 @@ use std::{collections::HashSet, fs::read_dir, process::Command}; use assert_cmd::prelude::*; +use indoc::indoc; use crate::{create_directory, create_file, overwrite_file}; @@ -131,6 +132,56 @@ fn validate_ignore_healthcheck() { ); } +#[test] +fn test_command_no_escape_codes_in_output() { + // A config with an unhandled fallible VRL function call (missing `!`). + // This triggers a VRL compilation error reported through the test runner. + let config = create_file(indoc! {" + transforms: + broken: + inputs: [] + type: remap + source: .foo = to_int(.bar) + tests: + - name: broken_test + input: + insert_at: broken + type: log + log_fields: + bar: not_an_int + outputs: + - extract_from: broken + conditions: + - type: vrl + source: 'true' + "}); + + let mut cmd = Command::cargo_bin("vector").unwrap(); + // Force colors on so VRL diagnostics contain ANSI codes. Without this, + // the subprocess detects a non-TTY and disables colors, which would make + // the test pass even if error! was used instead of eprintln!. + cmd.arg("--color").arg("always").arg("test").arg(config); + + let output = cmd.output().expect("Failed to execute process"); + let stdout = String::from_utf8(output.stdout).expect("stdout isn't valid utf8"); + let stderr = String::from_utf8(output.stderr).expect("stderr isn't valid utf8"); + + // The command should fail + assert_ne!(output.status.code(), Some(0)); + + // Neither stdout nor stderr should contain literal escape code text. + // The error! macro escapes ANSI escape bytes into literal "\x1b" text, + // while eprintln! passes them through as raw bytes. + assert!( + !stdout.contains(r"\x1b"), + "stdout contains literal \\x1b escape codes: {stdout}" + ); + assert!( + !stderr.contains(r"\x1b"), + "stderr contains literal \\x1b escape codes: {stderr}" + ); +} + fn validate(config: &str) -> i32 { let dir = create_directory(); diff --git a/tests/integration/dnstap/data/Dockerfile b/tests/integration/dnstap/data/Dockerfile index 2f829023d1f43..9694781b59155 100644 --- a/tests/integration/dnstap/data/Dockerfile +++ b/tests/integration/dnstap/data/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/library/debian:trixie +FROM docker.io/library/debian:trixie@sha256:3615a749858a1cba49b408fb49c37093db813321355a9ab7c1f9f4836341e9db ENV DEBIAN_FRONTEND=noninteractive diff --git a/tests/integration/windows-event-log/config/test.yaml b/tests/integration/windows-event-log/config/test.yaml new file mode 100644 index 0000000000000..ed0555b2e6695 --- /dev/null +++ b/tests/integration/windows-event-log/config/test.yaml @@ -0,0 +1,17 @@ +features: +- windows-event-log-integration-tests + +test_filter: '::windows_event_log::integration_tests::' + +runner: + env: {} + +matrix: + default: ["default"] + +# changes to these files/paths will invoke the integration test in CI +# expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sources/windows_event_log/**" +- "src/internal_events/windows_event_log.rs" +- "tests/integration/windows-event-log/**" diff --git a/vdev/Cargo.toml b/vdev/Cargo.toml index 8e540db8d0099..959362252e3cb 100644 --- a/vdev/Cargo.toml +++ b/vdev/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vdev" -version = "0.2.0" +version = "0.3.0" edition = "2024" authors = ["Vector Contributors "] license = "MPL-2.0" @@ -31,11 +31,11 @@ itertools.workspace = true log = "0.4.29" # watch https://github.com/epage/anstyle for official interop with Clap owo-colors = { version = "4.2.3", features = ["supports-colors"] } -paste.workspace = true +pastey.workspace = true regex.workspace = true reqwest = { workspace = true, features = ["blocking"] } serde.workspace = true -serde_json.workspace = true +serde_json = { workspace = true, features = ["preserve_order"] } serde_yaml.workspace = true sha2 = "0.10.9" tempfile.workspace = true @@ -45,6 +45,8 @@ semver.workspace = true indoc.workspace = true git2 = { version = "0.20.4" } cfg-if.workspace = true +vector-vrl-functions = { path = "../lib/vector-vrl/functions", features = ["dnstap", "vrl-metrics"] } +vrl = { workspace = true, features = ["docs"] } [package.metadata.binstall] pkg-url = "{ repo }/releases/download/vdev-v{ version }/{ name }-{ target }-v{ version }.tgz" diff --git a/vdev/src/commands/build/mod.rs b/vdev/src/commands/build/mod.rs index 48890c8682b83..845ae928b84d9 100644 --- a/vdev/src/commands/build/mod.rs +++ b/vdev/src/commands/build/mod.rs @@ -1,6 +1,8 @@ mod licenses; mod publish_metadata; mod vector; +mod vector_vrl_docs; +mod vrl_docs; mod vrl_wasm; crate::cli_subcommands! { @@ -11,6 +13,8 @@ crate::cli_subcommands! { publish_metadata, release_cue, vector, + vrl_docs, + vector_vrl_docs, vrl_wasm, } diff --git a/vdev/src/commands/build/vector_vrl_docs.rs b/vdev/src/commands/build/vector_vrl_docs.rs new file mode 100644 index 0000000000000..7c25a23b139c3 --- /dev/null +++ b/vdev/src/commands/build/vector_vrl_docs.rs @@ -0,0 +1,47 @@ +use anyhow::Result; +use std::path::PathBuf; +use vrl::docs::{build_functions_doc, document_functions_to_dir}; + +/// Generate Vector-specific VRL function documentation as JSON files. +#[derive(clap::Args, Debug)] +#[command()] +pub struct Cli { + /// Output directory to create JSON files. If unspecified output is written to stdout as a JSON + /// array + #[arg(short, long)] + output: Option, + + /// Whether to pretty-print or minify + #[arg(short, long, default_value_t = false)] + minify: bool, + + /// File extension for generated files + #[arg(short, long, default_value = "json")] + extension: String, +} + +impl Cli { + pub fn exec(self) -> Result<()> { + let functions = vector_vrl_functions::all_without_vrl_stdlib(); + if let Some(output) = &self.output { + document_functions_to_dir(&functions, output, &self.extension)?; + } else { + let built = build_functions_doc(&functions); + #[allow(clippy::print_stdout)] + if self.minify { + println!( + "{}", + serde_json::to_string(&built) + .expect("FunctionDoc serialization should not fail") + ); + } else { + println!( + "{}", + serde_json::to_string_pretty(&built) + .expect("FunctionDoc serialization should not fail") + ); + } + } + Ok(()) + } +} diff --git a/vdev/src/commands/build/vrl_docs.rs b/vdev/src/commands/build/vrl_docs.rs new file mode 100644 index 0000000000000..1917aa9cc6e13 --- /dev/null +++ b/vdev/src/commands/build/vrl_docs.rs @@ -0,0 +1,170 @@ +use std::{ + collections::BTreeMap, + fs, + path::{Path, PathBuf}, +}; + +use anyhow::{Context, Result, bail}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::utils::git::sparse_checkout_docs; +use crate::utils::paths::find_repo_root; + +const VRL_REPO_URL: &str = "https://github.com/vectordotdev/vrl.git"; +const VECTOR_REPO_URL: &str = "https://github.com/vectordotdev/vector.git"; +const VRL_PACKAGE_NAME: &str = "vrl"; + +/// Generate VRL function documentation by fetching pre-built JSON docs from the VRL and Vector +/// repositories. +/// +/// VRL stdlib docs come from the VRL repo (`docs/generated/*.json`), and Vector-specific function +/// docs come from the Vector repo (`docs/generated/*.json`). Both sets are merged into a single +/// `generated.cue` output file. +#[derive(clap::Args, Debug)] +#[command()] +pub struct Cli { + /// Output directory for the generated.cue file + #[arg(short, long)] + output_dir: PathBuf, + + /// VRL commit SHA to fetch docs from. If unspecified, read from Cargo.lock. + #[arg(long)] + vrl_sha: Option, + + /// Vector commit SHA to fetch docs from. If unspecified, read docs/generated locally. + #[arg(long)] + vector_sha: Option, +} + +#[derive(Serialize)] +struct FunctionDocWrapper { + remap: RemapWrapper, +} + +#[derive(Serialize)] +struct RemapWrapper { + functions: BTreeMap, +} + +impl Cli { + pub fn exec(self) -> Result<()> { + let repo_root = find_repo_root()?; + let temp_dir = tempfile::tempdir().context("Failed to create temp directory")?; + + // VRL stdlib docs + let vrl_sha = match self.vrl_sha { + Some(sha) => sha, + None => get_vrl_commit_sha(&repo_root)?, + }; + info!("VRL commit SHA: {vrl_sha}"); + + let vrl_clone_dir = temp_dir.path().join("vrl"); + sparse_checkout_docs(&vrl_sha, VRL_REPO_URL, &vrl_clone_dir)?; + let vrl_docs_dir = vrl_clone_dir.join("docs").join("generated"); + + let mut functions = read_function_docs(&vrl_docs_dir)?; + info!("Read {} VRL stdlib function docs", functions.len()); + + // Vector-specific docs + let vector_docs_dir = if let Some(vector_sha) = &self.vector_sha { + info!("Vector commit SHA: {vector_sha}"); + let vector_clone_dir = temp_dir.path().join("vector"); + sparse_checkout_docs(vector_sha, VECTOR_REPO_URL, &vector_clone_dir)?; + vector_clone_dir.join("docs").join("generated") + } else { + repo_root.join("docs").join("generated") + }; + + let vector_functions = read_function_docs(&vector_docs_dir)?; + info!("Read {} Vector function docs", vector_functions.len()); + functions.extend(vector_functions); + + let wrapper = FunctionDocWrapper { + remap: RemapWrapper { functions }, + }; + + fs::create_dir_all(&self.output_dir)?; + let mut json = serde_json::to_string(&wrapper)?; + json.push('\n'); + let filepath = self.output_dir.join("generated.cue"); + fs::write(&filepath, json)?; + + info!("Generated: {}", filepath.display()); + Ok(()) + } +} + +/// A minimal representation of a `[[package]]` entry in `Cargo.lock`. +#[derive(Deserialize)] +struct LockPackage { + name: String, + version: String, + source: Option, +} + +#[derive(Deserialize)] +struct CargoLock { + package: Vec, +} + +/// Parse `Cargo.lock` to find a git ref for the `vrl` package. +/// +/// Returns the commit SHA for git-sourced dependencies, or a version tag (e.g. `v0.31.0`) for +/// registry-sourced dependencies. +fn get_vrl_commit_sha(repo_root: &Path) -> Result { + let lock_path = repo_root.join("Cargo.lock"); + let lock_text = fs::read_to_string(&lock_path) + .with_context(|| format!("Failed to read {}", lock_path.display()))?; + + let lock: CargoLock = + toml::from_str(&lock_text).context("Failed to parse Cargo.lock as TOML")?; + + let pkg = lock + .package + .iter() + .find(|p| p.name == VRL_PACKAGE_NAME) + .context("Could not find VRL package in Cargo.lock")?; + + match pkg.source.as_deref() { + // Git source: "git+https://github.com/vectordotdev/vrl.git?branch=main#5316c01b..." + Some(source) if source.starts_with("git+") => source + .rsplit_once('#') + .map(|(_, sha)| sha.to_string()) + .context("Could not extract commit SHA from VRL git source string"), + // Registry source (crates.io): use the version as a tag + Some(source) if source.starts_with("registry+") => Ok(format!("v{}", pkg.version)), + Some(source) => bail!("Unrecognized VRL package source in Cargo.lock: {source}"), + None => bail!("VRL package in Cargo.lock has no source field"), + } +} + +/// Read all `*.json` files from a directory into a name->value map. +fn read_function_docs(docs_dir: &Path) -> Result> { + let mut functions = BTreeMap::new(); + + let entries: Vec<_> = fs::read_dir(docs_dir) + .with_context(|| format!("Failed to read docs directory: {}", docs_dir.display()))? + .collect::, _>>() + .context("Failed to iterate docs directory")?; + + for entry in entries { + let path = entry.path(); + if path.extension().is_some_and(|ext| ext == "json") { + let content = fs::read_to_string(&path) + .with_context(|| format!("Failed to read {}", path.display()))?; + let value: Value = serde_json::from_str(&content) + .with_context(|| format!("Failed to parse JSON from {}", path.display()))?; + + let name = path + .file_stem() + .and_then(|s| s.to_str()) + .context("Invalid filename")? + .to_string(); + + functions.insert(name, value); + } + } + + Ok(functions) +} diff --git a/vdev/src/commands/check/component_docs.rs b/vdev/src/commands/check/generated_docs.rs similarity index 76% rename from vdev/src/commands/check/component_docs.rs rename to vdev/src/commands/check/generated_docs.rs index 85a8ab280b1d8..62406951e3cde 100644 --- a/vdev/src/commands/check/component_docs.rs +++ b/vdev/src/commands/check/generated_docs.rs @@ -12,7 +12,10 @@ impl Cli { let dirty_component_files: Vec = files .into_iter() .filter(|file| file.starts_with("website/cue/reference")) - .filter(|file| file.contains("generated/")) + .filter(|file| { + file.contains("generated/") + || file.starts_with("website/cue/reference/remap/functions/") + }) .collect(); // If it is not empty, there are out-of-sync component Cue files in the current branch. @@ -22,7 +25,7 @@ impl Cli { println!(" - {file}"); } println!( - "Run `make generate-component-docs` locally to update your branch and commit/push the changes." + "Run `make generate-docs` locally to update your branch and commit/push the changes." ); std::process::exit(1); } diff --git a/vdev/src/commands/check/mod.rs b/vdev/src/commands/check/mod.rs index 4d61a76dbf3c2..44a2242cc35b3 100644 --- a/vdev/src/commands/check/mod.rs +++ b/vdev/src/commands/check/mod.rs @@ -1,8 +1,8 @@ -mod component_docs; mod component_features; mod deny; mod examples; mod fmt; +mod generated_docs; mod licenses; mod markdown; mod rust; @@ -10,7 +10,7 @@ mod scripts; crate::cli_subcommands! { "Check parts of the Vector code base..." - component_docs, + generated_docs, component_features, deny, docs, diff --git a/vdev/src/commands/mod.rs b/vdev/src/commands/mod.rs index d1ce23ef94231..807ed268b2fba 100644 --- a/vdev/src/commands/mod.rs +++ b/vdev/src/commands/mod.rs @@ -13,7 +13,7 @@ macro_rules! cli_commands { }; // All the identifiers are parsed out, build up the enum and impl blocks ( :: $( $mod:ident, )* :: ) => { - paste::paste! { + pastey::paste! { #[derive(clap::Subcommand, Debug)] enum Commands { $( [<$mod:camel>]($mod::Cli), )* @@ -111,7 +111,7 @@ cli_commands! { #[macro_export] macro_rules! script_wrapper { ( $mod:ident = $doc:literal => $script:literal ) => { - paste::paste! { + pastey::paste! { mod $mod { #[doc = $doc] #[derive(clap::Args, Debug)] diff --git a/vdev/src/utils/git.rs b/vdev/src/utils/git.rs index 54216c76e2da6..dbd458937ca37 100644 --- a/vdev/src/utils/git.rs +++ b/vdev/src/utils/git.rs @@ -1,6 +1,6 @@ //! Git utilities -use std::{collections::HashSet, process::Command}; +use std::{collections::HashSet, fs, path::Path, process::Command}; use anyhow::{Context, Result, anyhow, bail}; use git2::{BranchType, ErrorCode, Repository}; @@ -210,6 +210,32 @@ pub fn run_and_check_output(args: &[&str]) -> Result { Command::new("git").in_repo().args(args).check_output() } +/// Sparse-checkout only `docs/generated` from a repo at the given commit. +pub fn sparse_checkout_docs(sha: &str, repo_url: &str, clone_dir: &Path) -> Result<()> { + fs::create_dir_all(clone_dir)?; + + let git = |args: &[&str]| -> Result { + Command::new("git") + .current_dir(clone_dir) + .args(args) + .check_output() + }; + + git(&["init"])?; + git(&["remote", "add", "origin", repo_url])?; + git(&["config", "core.sparseCheckout", "true"])?; + + let sparse_file = clone_dir.join(".git").join("info").join("sparse-checkout"); + fs::create_dir_all(sparse_file.parent().unwrap())?; + fs::write(&sparse_file, "docs/generated\n") + .context("Failed to write sparse-checkout config")?; + + git(&["fetch", "--depth", "1", "origin", sha])?; + git(&["checkout", "FETCH_HEAD"])?; + + Ok(()) +} + fn is_warning_line(line: &str) -> bool { line.starts_with("warning: ") || line.contains("original line endings") } diff --git a/website/Makefile b/website/Makefile index aaf1ecd9a5bf4..bb6f296de6ae1 100644 --- a/website/Makefile +++ b/website/Makefile @@ -1,5 +1,7 @@ CUE = ../scripts/cue.sh +export CI ?= false + # Override to specify which network address to use in Hugo/HTTP servers for binding export SERVER_BIND ?= 127.0.0.1 # Override to specify which port to use in Hugo/HTTP servers for listening @@ -22,7 +24,13 @@ cue-%: config-examples: yarn config-examples -structured-data: cue-build config-examples +VDEV ?= cargo vdev + +generate-vrl-docs: + $(VDEV) build vrl-docs --output-dir cue/reference/remap/functions \ + $(if $(findstring true,$(CI)),--vector-sha $(GITHUB_SHA),) + +structured-data: generate-vrl-docs cue-build config-examples serve: clean setup cargo-data structured-data hugo server \ diff --git a/website/README.md b/website/README.md index 7343d56090fd9..214ed0d4007e2 100644 --- a/website/README.md +++ b/website/README.md @@ -6,7 +6,8 @@ This directory houses all the assets used to build Vector's website and document In order to run the site [locally](#run-the-site-locally), you need to have these installed: -* The [Hugo] static site generator. Refer to https://gohugo.io/installation/ for instructions. +* The [Hugo] static site generator (version 0.154.5 to match CI). + * Download the **extended** version for your platform from https://github.com/gohugoio/hugo/releases/tag/v0.154.5 * The CLI tool for the [CUE] configuration and validation language. * [Node.js] and the [Yarn] package manager (for static assets and some scripting). * [htmltest] for link checking. diff --git a/website/content/en/blog/log-namespacing.md b/website/content/en/blog/log-namespacing.md index 52bcb80bafadc..d867f21c9411d 100644 --- a/website/content/en/blog/log-namespacing.md +++ b/website/content/en/blog/log-namespacing.md @@ -167,6 +167,6 @@ sure a meaning exists for all required fields. If a source does not provide a re a meaning needs to be manually adjusted for any reason, the VRL function [set_semantic_meaning] can be used. -[global log schema]: /docs/reference/configuration/global-options/#log_schema +[global log schema]: /docs/reference/configuration/schema/#log_schema [set_semantic_meaning]: /docs/reference/vrl/functions/#set_semantic_meaning [remap]: /docs/reference/configuration/transforms/remap/ diff --git a/website/content/en/docs/reference/api.md b/website/content/en/docs/reference/api.md index 705c598ab9af7..a28724feeb0a7 100644 --- a/website/content/en/docs/reference/api.md +++ b/website/content/en/docs/reference/api.md @@ -9,7 +9,7 @@ Vector ships with a [GraphQL] API that allows you to interact with a running Vec ## Configuration -{{< api/config >}} +{{< config/group group="api" >}} ## Endpoints diff --git a/website/content/en/docs/reference/configuration/api.md b/website/content/en/docs/reference/configuration/api.md new file mode 100644 index 0000000000000..e3ba6a0372ff7 --- /dev/null +++ b/website/content/en/docs/reference/configuration/api.md @@ -0,0 +1,14 @@ +--- +title: API configuration reference +short: API +weight: 6 +show_toc: true +--- + +This page documents the configuration for Vector's observability API. + +The API enables you to query Vector's topology, metrics, and health information through a GraphQL endpoint, as well as access an interactive GraphQL playground for development. + +{{< config-cross-links group="api" >}} + +{{< config/group group="api" >}} diff --git a/website/content/en/docs/reference/configuration/global-options.md b/website/content/en/docs/reference/configuration/global-options.md index d3c2059f3c88b..6e2c296a476ce 100644 --- a/website/content/en/docs/reference/configuration/global-options.md +++ b/website/content/en/docs/reference/configuration/global-options.md @@ -3,8 +3,11 @@ title: Global options reference short: Global options weight: 4 aliases: ["/docs/reference/global-options"] +show_toc: true --- -## Global configuration parameters +This page documents global configuration options that apply to Vector as a whole, such as data directories, timezone settings, logging configuration, and more. -{{< config/global >}} +{{< config-cross-links group="global_options" >}} + +{{< config/group group="global_options" >}} diff --git a/website/content/en/docs/reference/configuration/pipeline-components.md b/website/content/en/docs/reference/configuration/pipeline-components.md new file mode 100644 index 0000000000000..c58bd681416e9 --- /dev/null +++ b/website/content/en/docs/reference/configuration/pipeline-components.md @@ -0,0 +1,14 @@ +--- +title: Pipeline components reference +short: Pipeline components +weight: 5 +show_toc: true +--- + +This page documents the top-level configuration for pipeline components: sources, transforms, sinks, and enrichment tables. + +These fields define the structure of your observability data pipeline. Each component is defined as a table within these sections, with component-specific configuration options. + +{{< config-cross-links group="pipeline_components" >}} + +{{< config/group group="pipeline_components" >}} diff --git a/website/content/en/docs/reference/configuration/schema.md b/website/content/en/docs/reference/configuration/schema.md new file mode 100644 index 0000000000000..94f380ed1a19b --- /dev/null +++ b/website/content/en/docs/reference/configuration/schema.md @@ -0,0 +1,12 @@ +--- +title: Schema configuration reference +short: Schema +weight: 7 +show_toc: true +--- + +This page documents the configuration for Vector's internal schema system. + +{{< config-cross-links group="schema" >}} + +{{< config/group group="schema" >}} diff --git a/website/content/en/docs/reference/configuration/secrets.md b/website/content/en/docs/reference/configuration/secrets.md new file mode 100644 index 0000000000000..5137831498929 --- /dev/null +++ b/website/content/en/docs/reference/configuration/secrets.md @@ -0,0 +1,14 @@ +--- +title: Secrets configuration reference +short: Secrets +weight: 8 +show_toc: true +--- + +This page documents the configuration for Vector's secrets management. + +Secrets allow you to securely store and reference sensitive configuration values like API keys, passwords, and tokens without exposing them in plaintext configuration files. + +{{< config-cross-links group="secrets" >}} + +{{< config/group group="secrets" >}} diff --git a/website/content/en/docs/reference/configuration/sources/windows_event_log.md b/website/content/en/docs/reference/configuration/sources/windows_event_log.md new file mode 100644 index 0000000000000..56e08ac25efb2 --- /dev/null +++ b/website/content/en/docs/reference/configuration/sources/windows_event_log.md @@ -0,0 +1,14 @@ +--- +title: Windows Event Log +description: Collect logs from [Windows Event Log](https://learn.microsoft.com/en-us/windows/win32/wes/windows-event-log) channels using the native Windows Event Log API +component_kind: source +layout: component +tags: ["windows_event_log", "component", "source", "logs"] +--- + +{{/* +This doc is generated using: + +1. The template in layouts/docs/component.html +2. The relevant CUE data in cue/reference/components/... +*/}} diff --git a/website/content/en/docs/reference/configuration/template-syntax.md b/website/content/en/docs/reference/configuration/template-syntax.md index 372cd7988b4bf..43822e137c555 100644 --- a/website/content/en/docs/reference/configuration/template-syntax.md +++ b/website/content/en/docs/reference/configuration/template-syntax.md @@ -59,7 +59,7 @@ option = "year=%Y/month=%m/day=%d/" {{< info >}} The value is derived from the [`timestamp` field](/docs/architecture/data-model/log/#timestamps) -and the name of this field can be changed via the [global `timestamp_key` option](/docs/reference/configuration/global-options/#log_schema.timestamp_key). +and the name of this field can be changed via the [global `timestamp_key` option](/docs/reference/configuration/schema/#log_schema.timestamp_key). {{< /info >}} ### Escaping diff --git a/website/content/en/guides/aws/aws-secrets-manager.md b/website/content/en/guides/aws/aws-secrets-manager.md index ac02a5f9fc6ab..51971e64d3156 100644 --- a/website/content/en/guides/aws/aws-secrets-manager.md +++ b/website/content/en/guides/aws/aws-secrets-manager.md @@ -255,5 +255,5 @@ vector --config vector.yaml 2>&1 | grep -i secret ## Related resources - [AWS Secrets Manager Documentation](https://docs.aws.amazon.com/secretsmanager/) -- [Vector Secrets Management Overview](/docs/reference/configuration/global-options/#secret) -- [AWS Authentication in Vector](/docs/reference/configuration/components/aws/) +- [Vector Secrets Management Overview](/docs/reference/configuration/secrets/#secret) +- [AWS Authentication in Vector](/docs/reference/configuration/sinks/aws_s3/#auth) diff --git a/website/content/en/guides/level-up/log_namespace.md b/website/content/en/guides/level-up/log_namespace.md index 14ae5fc02230e..0782d91aac23c 100644 --- a/website/content/en/guides/level-up/log_namespace.md +++ b/website/content/en/guides/level-up/log_namespace.md @@ -16,10 +16,28 @@ Before you begin, this guide assumes the following: * When log namespacing is enabled, the [global schema settings] are ignored. * This feature is still in `beta` so behavior might change. -[global schema settings]: /docs/reference/configuration/global-options/#log_schema +[global schema settings]: /docs/reference/configuration/schema/#log_schema [docs.setup.quickstart]: /docs/setup/quickstart/ + +If you encounter any issues please [report them here](https://github.com/vectordotdev/vector/issues/new?template=bug.yml). + {{< /requirement >}} +## Background + +Vector traditionally stored metadata (like `host`, `timestamp`, and `source_type`) as top-level +fields alongside your log data. This "legacy" approach has a few drawbacks: + +* **Field name collisions**: If your logs contain a field named `host`, it could conflict with + Vector's metadata field +* **Unclear ownership**: It's not immediately obvious which fields are from your data and which + are Vector metadata +* **Difficult transformations**: When you want to transform only your data (not metadata), you + need to be careful to exclude metadata fields + +The Vector namespace mode solves these issues by storing metadata in a separate namespace, +completely isolated from your log data. + ## Default Behavior ### Vector Config @@ -190,3 +208,58 @@ Sample output from `json_console`: ```json "bar" ``` + +## Migration Considerations + +If you're considering migrating from legacy mode (`log_namespace = false`) to Vector namespace mode +(`log_namespace = true`), here are key things to be aware of: + +### VRL Updates + +VRL scripts that reference metadata fields will need to be updated to use the metadata accessor syntax: + +**Legacy mode:** + +```coffee +.host = "new-host" +.timestamp = now() +``` + +**Vector namespace mode:** + +```coffee +%vector.host = "new-host" +%vector.ingest_timestamp = now() +``` + +### Sink Behavior Differences + +Many sinks will behave differently depending on the namespace setting. Always test your sinks after switching modes to verify expected +behavior before deploying. + +### Gradual Migration Strategy + +You can configure `log_namespace` per-source if you need a gradual migration: + +```yaml +# Global default (legacy) +schema: + log_namespace: false + +sources: + # New source using Vector namespace + new_source: + type: http_server + log_namespace: true + + # Existing source still using legacy + existing_source: + type: file + # Uses global default (false) +``` + +This allows you to: + +1. Keep existing pipelines working with legacy mode +2. Adopt Vector namespace mode for selected sources only +3. Migrate sources incrementally over time diff --git a/website/content/en/highlights/2020-01-14-rename-line-field-to-message.md b/website/content/en/highlights/2020-01-14-rename-line-field-to-message.md index 67ab35f529498..163a145484f43 100644 --- a/website/content/en/highlights/2020-01-14-rename-line-field-to-message.md +++ b/website/content/en/highlights/2020-01-14-rename-line-field-to-message.md @@ -21,4 +21,4 @@ options][docs.reference.configuration.global-options#log_schema]. There are no changes you need to make. Just be aware that your events will no longer have a `line` field. -[docs.reference.configuration.global-options#log_schema]: /docs/reference/configuration/global-options/#log_schema +[docs.reference.configuration.global-options#log_schema]: /docs/reference/configuration/schema/#log_schema diff --git a/website/content/en/highlights/2020-02-14-global-log-schema.md b/website/content/en/highlights/2020-02-14-global-log-schema.md index c12851e15b10b..6949d0d409fae 100644 --- a/website/content/en/highlights/2020-02-14-global-log-schema.md +++ b/website/content/en/highlights/2020-02-14-global-log-schema.md @@ -41,9 +41,9 @@ Why is this useful? to the downstream service's "host" field. [docs.data-model.log]: /docs/architecture/data-model/log -[docs.global-options#host_key]: /docs/reference/configuration/global-options/#log_schema.host_key -[docs.global-options#log_schema]: /docs/reference/configuration/global-options/#log_schema -[docs.global-options#message_key]: /docs/reference/configuration/global-options/#log_schema.message_key +[docs.global-options#host_key]: /docs/reference/configuration/schema/#log_schema.host_key +[docs.global-options#log_schema]: /docs/reference/configuration/schema/#log_schema +[docs.global-options#message_key]: /docs/reference/configuration/schema/#log_schema.message_key [docs.sinks]: /docs/reference/configuration/sinks/ [docs.sources]: /docs/reference/configuration/sources/ [docs.transforms.remap]: /docs/reference/configuration/transforms/remap diff --git a/website/content/en/highlights/2020-10-28-new-aws-integrations.md b/website/content/en/highlights/2020-10-28-new-aws-integrations.md index 117f4ff31d019..e41ea8e563a75 100644 --- a/website/content/en/highlights/2020-10-28-new-aws-integrations.md +++ b/website/content/en/highlights/2020-10-28-new-aws-integrations.md @@ -37,6 +37,6 @@ and let us know. [aws_s3_source]: /docs/reference/configuration/sources/aws_s3/ [aws_sqs_sink]: /docs/reference/configuration/sinks/aws_sqs/ [chat]: https://chat.vector.dev -[cloudwatch_guide]: /guides/advanced/cloudwatch-logs-firehose/ +[cloudwatch_guide]: /guides/aws/cloudwatch-logs-firehose/ [rube_goldberg]: https://en.wikipedia.org/wiki/Rube_Goldberg_machine [sinks]: /docs/reference/configuration/sinks/ diff --git a/website/content/en/highlights/2021-11-18-failed-event-routing.md b/website/content/en/highlights/2021-11-18-failed-event-routing.md index 155d97d4322db..76811d7b80c33 100644 --- a/website/content/en/highlights/2021-11-18-failed-event-routing.md +++ b/website/content/en/highlights/2021-11-18-failed-event-routing.md @@ -128,6 +128,6 @@ transforms like `filter`. In the meantime, if you any feedback for us, let us know on our [Discord chat] or on [Twitter]! [remap docs page]: /docs/reference/configuration/transforms/remap/ -[log_schema.metadata_key]: /docs/reference/configuration/global-options/#log_schema.metadata_key +[log_schema.metadata_key]: /docs/reference/configuration/schema/#log_schema.metadata_key [Discord chat]: https://discord.com/invite/dX3bdkF [Twitter]: https://twitter.com/vectordotdev diff --git a/website/content/en/highlights/2021-11-18-implicit-namespacing.md b/website/content/en/highlights/2021-11-18-implicit-namespacing.md index 3f868958f05f8..087a2e5b06575 100644 --- a/website/content/en/highlights/2021-11-18-implicit-namespacing.md +++ b/website/content/en/highlights/2021-11-18-implicit-namespacing.md @@ -86,7 +86,7 @@ This can similarly be applied to the newly added [`enrichment_tables`][enrichmen If you any feedback for us, let us know on our [Discord chat] or on [Twitter]. -[enrichment_tables]: /docs/reference/configuration/global-options/#enrichment_tables +[enrichment_tables]: /docs/reference/configuration/pipeline-components/#enrichment_tables [tests]: /docs/reference/configuration/unit-tests [Discord chat]: https://discord.com/invite/dX3bdkF [Twitter]: https://twitter.com/vectordotdev diff --git a/website/content/en/highlights/2022-07-07-secrets-management.md b/website/content/en/highlights/2022-07-07-secrets-management.md index 97f482576fe03..5f69e3783df04 100644 --- a/website/content/en/highlights/2022-07-07-secrets-management.md +++ b/website/content/en/highlights/2022-07-07-secrets-management.md @@ -59,4 +59,4 @@ Vector will then use the returned values when loading the configuration. If an `error` is returned, or the command exits non-zero, Vector will log any errors and stop. -See the [documentation](/docs/reference/configuration/global-options/#secret) for additional details. +See the [documentation](/docs/reference/configuration/secrets/#secret) for additional details. diff --git a/website/content/en/highlights/2022-08-16-0-24-0-upgrade-guide.md b/website/content/en/highlights/2022-08-16-0-24-0-upgrade-guide.md index 7a18c94e1fe8b..360cabd84f9c5 100644 --- a/website/content/en/highlights/2022-08-16-0-24-0-upgrade-guide.md +++ b/website/content/en/highlights/2022-08-16-0-24-0-upgrade-guide.md @@ -236,7 +236,7 @@ Sinks: #### Deprecation of the `geoip` transform {#geoip-deprecation} The `geoip` transform has been deprecated in-lieu of new support for [`geoip` enrichment -tables](/docs/reference/configuration/global-options/#enrichment_tables.type). These can be used +tables](/docs/reference/configuration/pipeline-components/#enrichment_tables..type). These can be used with [VRL's enrichment table functions](/docs/reference/vrl/functions/#enrichment-functions) to enrich events using a [GeoIP database](https://www.maxmind.com/en/geoip2-databases). diff --git a/website/content/en/highlights/2022-11-07-0-26-0-upgrade-guide.md b/website/content/en/highlights/2022-11-07-0-26-0-upgrade-guide.md index cc64dcdf3762f..26235cfa5498b 100644 --- a/website/content/en/highlights/2022-11-07-0-26-0-upgrade-guide.md +++ b/website/content/en/highlights/2022-11-07-0-26-0-upgrade-guide.md @@ -63,7 +63,7 @@ transforms: The `geoip` transform was deprecated in `0.24.0` and has now been removed. You can get the same functionality through a `remap` transform with [`geoip` enrichment -tables](/docs/reference/configuration/global-options/#enrichment_tables.type). These can be used +tables](/docs/reference/configuration/pipeline-components/#enrichment_tables..type). These can be used with [VRL's enrichment table functions](/docs/reference/vrl/functions/#enrichment-functions) to enrich events using a [GeoIP database](https://www.maxmind.com/en/geoip2-databases). diff --git a/website/content/en/releases/0.54.0.md b/website/content/en/releases/0.54.0.md new file mode 100644 index 0000000000000..f52a589288c84 --- /dev/null +++ b/website/content/en/releases/0.54.0.md @@ -0,0 +1,4 @@ +--- +title: Vector v0.54.0 release notes +weight: 33 +--- diff --git a/website/cue/reference/administration/interfaces/kubectl.cue b/website/cue/reference/administration/interfaces/kubectl.cue index cdd91460bf05f..32258dfb538b9 100644 --- a/website/cue/reference/administration/interfaces/kubectl.cue +++ b/website/cue/reference/administration/interfaces/kubectl.cue @@ -19,7 +19,7 @@ administration: interfaces: kubectl: { role_implementations: [Name=string]: { commands: { _deployment_variant: string - _vector_version: "0.53" + _vector_version: "0.54" _namespace: string | *"vector" _controller_resource_type: string _controller_resource_name: string | *_deployment_variant diff --git a/website/cue/reference/api.cue b/website/cue/reference/api.cue index 720b5ef84334f..7575915c4be26 100644 --- a/website/cue/reference/api.cue +++ b/website/cue/reference/api.cue @@ -17,7 +17,7 @@ api: { Vector in real-time. """ schema_json_url: "https://github.com/vectordotdev/vector/blob/master/lib/vector-api-client/graphql/schema.json" - configuration: generated.api.configuration.api + configuration: generated.configuration.configuration.api.type.object.options endpoints: { "/graphql": { diff --git a/website/cue/reference/components/sources/generated/opentelemetry.cue b/website/cue/reference/components/sources/generated/opentelemetry.cue index 4791cc458c8cf..d3b498506a323 100644 --- a/website/cue/reference/components/sources/generated/opentelemetry.cue +++ b/website/cue/reference/components/sources/generated/opentelemetry.cue @@ -327,13 +327,67 @@ generated: components: sources: opentelemetry: configuration: { } use_otlp_decoding: { description: """ - Setting this field will override the legacy mapping of OTEL protos to Vector events and use the proto directly. - - One major caveat here is that the incoming metrics will be parsed as logs but they will preserve the OTLP format. - This means that components that work on metrics, will not be compatible with this output. - However, these events can be forwarded directly to a downstream OTEL collector. + Configuration for OTLP decoding behavior. + + This configuration controls how OpenTelemetry Protocol (OTLP) data is decoded for each + signal type (logs, metrics, traces). When a signal is configured to use OTLP decoding, the raw OTLP format is + preserved, allowing the data to be forwarded to downstream OTLP collectors without transformation. + Otherwise, the signal is converted to Vector's native event format. + + Simple boolean form: + + ```yaml + use_otlp_decoding: true # All signals preserve OTLP format + # or + use_otlp_decoding: false # All signals use Vector native format (default) + ``` + + Per-signal configuration: + + ```yaml + use_otlp_decoding: + logs: false # Convert to Vector native format + metrics: false # Convert to Vector native format + traces: true # Preserve OTLP format + ``` + + **Note:** When OTLP decoding is enabled for metrics: + - Metrics are parsed as logs while preserving the OTLP format + - Vector's metric transforms will NOT be compatible with this output + - The events can be forwarded directly (passthrough) to a downstream OTLP collector """ required: false - type: bool: default: false + type: object: options: { + logs: { + description: """ + Whether to use OTLP decoding for logs. + + When `true`, logs preserve their OTLP format. + When `false` (default), logs are converted to Vector's native format. + """ + required: false + type: bool: default: false + } + metrics: { + description: """ + Whether to use OTLP decoding for metrics. + + When `true`, metrics preserve their OTLP format but are processed as logs. + When `false` (default), metrics are converted to Vector's native metric format. + """ + required: false + type: bool: default: false + } + traces: { + description: """ + Whether to use OTLP decoding for traces. + + When `true`, traces preserve their OTLP format. + When `false` (default), traces are converted to Vector's native format. + """ + required: false + type: bool: default: false + } + } } } diff --git a/website/cue/reference/components/sources/generated/windows_event_log.cue b/website/cue/reference/components/sources/generated/windows_event_log.cue new file mode 100644 index 0000000000000..491b37359bd69 --- /dev/null +++ b/website/cue/reference/components/sources/generated/windows_event_log.cue @@ -0,0 +1,283 @@ +package metadata + +generated: components: sources: windows_event_log: configuration: { + acknowledgements: { + deprecated: true + description: """ + Controls how acknowledgements are handled for this source. + + When enabled, the source will wait for downstream sinks to acknowledge + receipt of events before updating checkpoints. This provides exactly-once + delivery guarantees at the cost of potential duplicate events on restart + if acknowledgements are pending. + + When disabled (default), checkpoints are updated immediately after reading + events, which may result in data loss if Vector crashes before events are + delivered to sinks. + """ + required: false + type: object: options: enabled: { + description: "Whether or not end-to-end acknowledgements are enabled for this source." + required: false + type: bool: {} + } + } + batch_size: { + description: """ + Batch size for event processing. + + This controls how many events are processed in a single batch. + """ + required: false + type: uint: { + default: 100 + examples: [10, 100] + } + } + channels: { + description: """ + A comma-separated list of channels to read from. + + Common channels include "System", "Application", "Security", "Windows PowerShell". + Use Windows Event Viewer to discover available channels. + """ + required: true + type: array: items: type: string: examples: ["System,Application,Security", "System"] + } + checkpoint_interval_secs: { + description: """ + Interval in seconds between periodic checkpoint flushes. + + Controls how often bookmarks are persisted to disk in synchronous mode. + Lower values reduce the window of events that may be re-processed after + a crash, at the cost of more frequent disk writes. + """ + required: false + type: uint: { + default: 5 + examples: [5, 1, 30] + } + } + connection_timeout_secs: { + description: """ + Connection timeout in seconds for event subscription. + + This controls how long to wait for event subscription connection. + """ + required: false + type: uint: { + default: 30 + examples: [30, 60] + } + } + data_dir: { + description: """ + The directory where checkpoint data is stored. + + By default, the [global `data_dir` option][global_data_dir] is used. + Make sure the running user has write permissions to this directory. + + [global_data_dir]: https://vector.dev/docs/reference/configuration/global-options/#data_dir + """ + required: false + type: string: examples: ["/var/lib/vector", "C:\\ProgramData\\vector"] + } + event_data_format: { + description: """ + Custom event data formatting options. + + Maps event field names to custom formatting options. + """ + required: false + type: object: options: "*": { + description: "An individual event data format override." + required: true + type: string: enum: { + auto: """ + Keep the original format unchanged (passthrough). + The field value will not be converted or modified. + """ + boolean: """ + Parse and format the field value as a boolean. + Recognizes "true", "1", "yes", "on" as true (case-insensitive). + """ + float: "Parse and format the field value as a floating-point number." + integer: "Parse and format the field value as an integer." + string: "Format the field value as a string." + } + } + } + event_query: { + description: """ + The XPath query for filtering events. + + Allows filtering events using XML Path Language queries. + If not specified, all events from the specified channels will be collected. + """ + required: false + type: string: examples: ["*[System[Level=1 or Level=2 or Level=3]]", "*[System[(Level=1 or Level=2 or Level=3) and TimeCreated[timediff(@SystemTime) <= 86400000]]]"] + } + event_timeout_ms: { + description: """ + Timeout in milliseconds for waiting for new events. + + Controls the maximum time `WaitForMultipleObjects` blocks before + returning to check for shutdown signals. Lower values increase + shutdown responsiveness at the cost of more frequent wake-ups. + """ + required: false + type: uint: { + default: 5000 + examples: [5000, 10000] + } + } + events_per_second: { + description: """ + Maximum number of events to process per second. + + When set to a non-zero value, Vector will rate-limit event processing + to prevent overwhelming downstream systems. A value of 0 (default) means + no rate limiting is applied. + """ + required: false + type: uint: { + default: 0 + examples: [100, 1000, 5000] + } + } + field_filter: { + description: """ + Event field inclusion/exclusion patterns. + + Controls which event fields are included in the output. + """ + required: false + type: object: options: { + exclude_fields: { + description: """ + Fields to exclude from the output. + + These fields will be removed from the event data. + """ + required: false + type: array: items: type: string: {} + } + include_event_data: { + description: """ + Whether to include event data fields. + + Event data fields contain application-specific data. + """ + required: false + type: bool: default: true + } + include_fields: { + description: """ + Fields to include in the output. + + If specified, only these fields will be included. + """ + required: false + type: array: items: type: string: {} + } + include_system_fields: { + description: """ + Whether to include system fields. + + System fields include metadata like Computer, TimeCreated, etc. + """ + required: false + type: bool: default: true + } + include_user_data: { + description: """ + Whether to include user data fields. + + User data fields contain additional custom data. + """ + required: false + type: bool: default: true + } + } + } + ignore_event_ids: { + description: """ + Ignore specific event IDs. + + Events with these IDs will be filtered out and not sent downstream. + """ + required: false + type: array: { + default: [] + items: type: uint: examples: [4624, 4625, 4634] + } + } + include_xml: { + description: """ + Whether to include raw XML data in the output. + + When enabled, the raw XML representation of the event is included + in the `xml` field of the output event. + """ + required: false + type: bool: default: false + } + max_event_age_secs: { + description: """ + Maximum age of events to process (in seconds). + + Events older than this value will be ignored. If not specified, + all events will be processed regardless of age. + """ + required: false + type: uint: examples: [86400, 604800] + } + max_event_data_length: { + description: """ + Maximum length for event data field values. + + Event data values longer than this will be truncated with "...\\[truncated\\]" appended. + Set to 0 for no limit. + """ + required: false + type: uint: { + default: 0 + examples: [1024, 4096] + } + } + only_event_ids: { + description: """ + Only include specific event IDs. + + If specified, only events with these IDs will be processed. + Takes precedence over `ignore_event_ids`. + """ + required: false + type: array: items: type: uint: examples: [1000, 1001, 1002] + } + read_existing_events: { + description: """ + Whether to read existing events or only new events. + + When set to `true`, the source will read all existing events from the channels. + When set to `false` (default), only new events will be read. + """ + required: false + type: bool: default: false + } + render_message: { + description: """ + Whether to render human-readable event messages. + + When enabled (default), Vector will use the Windows EvtFormatMessage API + to render localized, human-readable event messages with parameter + substitution. This matches the behavior of Windows Event Viewer. + + Provider DLL handles are cached per provider, so the performance cost + is limited to the first event from each provider. Disable only if you + do not need rendered messages and want to eliminate the DLL loads entirely. + """ + required: false + type: bool: default: true + } +} diff --git a/website/cue/reference/components/sources/windows_event_log.cue b/website/cue/reference/components/sources/windows_event_log.cue new file mode 100644 index 0000000000000..0bf1c424daea6 --- /dev/null +++ b/website/cue/reference/components/sources/windows_event_log.cue @@ -0,0 +1,113 @@ +package metadata + +components: sources: windows_event_log: { + title: "Windows Event Log" + + description: """ + Collects log events from Windows Event Log channels using the native + Windows Event Log API. + """ + + classes: { + commonly_used: false + delivery: "at_least_once" + deployment_roles: ["daemon"] + development: "beta" + egress_method: "stream" + stateful: true + } + + features: { + auto_generated: true + acknowledgements: true + collect: { + checkpoint: enabled: true + from: service: { + name: "Windows Event Log" + thing: "Windows Event Log channels" + url: "https://learn.microsoft.com/en-us/windows/win32/wes/windows-event-log" + versions: null + } + } + multiline: enabled: false + } + + support: { + requirements: [ + """ + This source is only supported on Windows. Attempting to use it on + other operating systems will result in an error at startup. + """, + ] + warnings: [] + notices: [] + } + + installation: { + platform_name: null + } + + configuration: generated.components.sources.windows_event_log.configuration + + output: { + logs: event: { + description: "An individual Windows Event Log event." + fields: { + source_type: { + description: "The name of the source type." + required: true + type: string: { + examples: ["windows_event_log"] + } + } + timestamp: { + description: "The timestamp of the event." + required: false + type: timestamp: {} + } + message: { + description: "The rendered event message." + required: false + type: string: { + examples: ["The service was started successfully."] + } + } + channel: { + description: "The event log channel name." + required: false + type: string: { + examples: ["System", "Application", "Security"] + } + } + event_id: { + description: "The event identifier." + required: false + type: uint: { + examples: [7036, 4624, 1000] + } + } + provider_name: { + description: "The name of the event provider." + required: false + type: string: { + examples: ["Microsoft-Windows-Security-Auditing"] + } + } + computer: { + description: "The name of the computer that generated the event." + required: false + type: string: { + examples: ["DESKTOP-ABC123"] + } + } + level: { + description: "The event severity level." + required: false + type: string: { + examples: ["Information", "Warning", "Error", "Critical"] + } + } + } + } + } +} diff --git a/website/cue/reference/configuration.cue b/website/cue/reference/configuration.cue index 2321cea179a15..38fb761216d4a 100644 --- a/website/cue/reference/configuration.cue +++ b/website/cue/reference/configuration.cue @@ -6,11 +6,13 @@ configuration: { outputs: [components.#Output, ...components.#Output] } } + groups?: _ how_it_works: #HowItWorks } configuration: { configuration: generated.configuration.configuration + groups: generated.configuration.groups configuration: { // expire_metrics's type is a little bit tricky, we could not generate `uint` from `docs::type_override` metadata macro easily. @@ -25,6 +27,7 @@ configuration: { """ required: false warnings: ["Deprecated, please use `expire_metrics_secs` instead."] + group: "global_options" type: object: options: { secs: { common: true diff --git a/website/cue/reference/generated/api.cue b/website/cue/reference/generated/api.cue deleted file mode 100644 index 298b9fb59e686..0000000000000 --- a/website/cue/reference/generated/api.cue +++ /dev/null @@ -1,44 +0,0 @@ -package metadata - -generated: api: configuration: api: { - address: { - common: true - description: """ - The network address to which the API should bind. If you're running - Vector in a Docker container, bind to `0.0.0.0`. Otherwise - the API will not be exposed outside the container. - """ - required: false - type: string: { - default: "127.0.0.1:8686" - examples: ["0.0.0.0:8686", "127.0.0.1:1234"] - } - } - enabled: { - common: true - description: "Whether the GraphQL API is enabled for this Vector instance." - required: false - type: bool: default: false - } - graphql: { - common: true - description: """ - Whether the endpoint for receiving and processing GraphQL queries is - enabled for the API. The endpoint is accessible via the `/graphql` - endpoint of the address set using the `bind` parameter. - """ - required: false - type: bool: default: true - } - playground: { - common: false - description: """ - Whether the [GraphQL Playground](https://github.com/graphql/graphql-playground) is enabled - for the API. The Playground is accessible via the `/playground` endpoint - of the address set using the `bind` parameter. Note that the `playground` - endpoint will only be enabled if the `graphql` endpoint is also enabled. - """ - required: false - type: bool: default: true - } -} diff --git a/website/cue/reference/generated/configuration.cue b/website/cue/reference/generated/configuration.cue index 9a5173d3d0769..0dbca1b677111 100644 --- a/website/cue/reference/generated/configuration.cue +++ b/website/cue/reference/generated/configuration.cue @@ -1,1045 +1,1548 @@ package metadata -generated: configuration: configuration: { - healthchecks: { - type: object: options: { - enabled: { - type: bool: default: true - description: """ - Whether or not healthchecks are enabled for all sinks. - - Can be overridden on a per-sink basis. - """ - required: false - } - require_healthy: { - type: bool: default: false - description: """ - Whether or not to require a sink to report as being healthy during startup. - - When enabled and a sink reports not being healthy, Vector will exit during start-up. - - Can be alternatively set, and overridden by, the `--require-healthy` command-line flag. - """ - required: false +generated: configuration: { + configuration: { + api: { + type: object: options: { + address: { + type: string: { + default: "127.0.0.1:8686" + examples: ["0.0.0.0:8686", "127.0.0.1:1234"] + } + description: """ + The network address to which the API should bind. If you're running + Vector in a Docker container, bind to `0.0.0.0`. Otherwise + the API will not be exposed outside the container. + """ + common: true + required: false + } + enabled: { + type: bool: default: false + description: "Whether the GraphQL API is enabled for this Vector instance." + common: true + required: false + } + graphql: { + type: bool: default: true + description: """ + Whether the endpoint for receiving and processing GraphQL queries is + enabled for the API. The endpoint is accessible via the `/graphql` + endpoint of the address set using the `bind` parameter. + """ + common: true + required: false + } + playground: { + type: bool: default: true + description: """ + Whether the [GraphQL Playground](https://github.com/graphql/graphql-playground) is enabled + for the API. The Playground is accessible via the `/playground` endpoint + of the address set using the `bind` parameter. Note that the `playground` + endpoint will only be enabled if the `graphql` endpoint is also enabled. + """ + common: false + required: false + } } + description: "API options." + warnings: ["The API currently does not support authentication. Only enable it in isolated environments or for debugging. It must not be exposed to untrusted clients."] + group: "api" } - description: "Healthcheck options." - } - enrichment_tables: { - type: object: options: { - file: { + enrichment_tables: { + type: object: options: "*": { type: object: options: { - encoding: { + graph: { + type: object: options: node_attributes: { + type: object: { + options: "*": { + type: string: {} + required: true + description: "A single graph node attribute in graphviz DOT language." + } + examples: [{ + color: "red" + name: "Example Node" + width: "5.0" + }] + } + description: """ + Node attributes to add to this component's node in resulting graph + + They are added to the node as provided + """ + required: false + } + description: """ + Extra graph configuration + + Configure output for component when generated with graph command + """ + required: false + } + inputs: { + type: array: { + items: type: string: examples: ["my-source-or-transform-id", "prefix-*"] + default: [] + } + description: """ + A list of upstream [source][sources] or [transform][transforms] IDs. + + Wildcards (`*`) are supported. + + See [configuration][configuration] for more info. + + [sources]: https://vector.dev/docs/reference/configuration/sources/ + [transforms]: https://vector.dev/docs/reference/configuration/transforms/ + [configuration]: https://vector.dev/docs/reference/configuration/ + """ + required: false + } + file: { type: object: options: { - delimiter: { - type: string: default: "," - description: "The delimiter used to separate fields in each row of the CSV file." - required: false + encoding: { + type: object: options: { + delimiter: { + type: string: default: "," + description: "The delimiter used to separate fields in each row of the CSV file." + required: false + } + include_headers: { + type: bool: default: true + description: """ + Whether or not the file contains column headers. + + When set to `true`, the first row of the CSV file will be read as the header row, and + the values will be used for the names of each column. This is the default behavior. + + When set to `false`, columns are referred to by their numerical index. + """ + required: false + } + type: { + required: true + type: string: enum: csv: """ + Decodes the file as a [CSV][csv] (comma-separated values) file. + + [csv]: https://wikipedia.org/wiki/Comma-separated_values + """ + description: "File encoding type." + } + } + description: "File encoding configuration." + required: true } - include_headers: { - type: bool: default: true + path: { + type: string: {} description: """ - Whether or not the file contains column headers. + The path of the enrichment table file. - When set to `true`, the first row of the CSV file will be read as the header row, and - the values will be used for the names of each column. This is the default behavior. + Currently, only [CSV][csv] files are supported. - When set to `false`, columns are referred to by their numerical index. + [csv]: https://en.wikipedia.org/wiki/Comma-separated_values """ - required: false - } - type: { required: true - type: string: enum: csv: """ - Decodes the file as a [CSV][csv] (comma-separated values) file. - - [csv]: https://wikipedia.org/wiki/Comma-separated_values - """ - description: "File encoding type." } } - description: "File encoding configuration." - required: true + description: "File-specific settings." + required: true + relevant_when: "type = \"file\"" } - path: { - type: string: {} + schema: { + type: object: options: "*": { + type: string: {} + required: true + description: "Represents mapped log field names and types." + } description: """ - The path of the enrichment table file. - - Currently, only [CSV][csv] files are supported. - - [csv]: https://en.wikipedia.org/wiki/Comma-separated_values + Key/value pairs representing mapped log field names and types. + + This is used to coerce log fields from strings into their proper types. The available types are listed in the `Types` list below. + + Timestamp coercions need to be prefaced with `timestamp|`, for example `"timestamp|%F"`. Timestamp specifiers can use either of the following: + + 1. One of the built-in-formats listed in the `Timestamp Formats` table below. + 2. The [time format specifiers][chrono_fmt] from Rust’s `chrono` library. + + Types + + - **`bool`** + - **`string`** + - **`float`** + - **`integer`** + - **`date`** + - **`timestamp`** (see the table below for formats) + + Timestamp Formats + + | Format | Description | Example | + |----------------------|----------------------------------------------------------------------------------|----------------------------------| + | `%F %T` | `YYYY-MM-DD HH:MM:SS` | `2020-12-01 02:37:54` | + | `%v %T` | `DD-Mmm-YYYY HH:MM:SS` | `01-Dec-2020 02:37:54` | + | `%FT%T` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], without time zone | `2020-12-01T02:37:54` | + | `%FT%TZ` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], UTC | `2020-12-01T09:37:54Z` | + | `%+` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], UTC, with time zone | `2020-12-01T02:37:54-07:00` | + | `%a, %d %b %Y %T` | [RFC 822][rfc822]/[RFC 2822][rfc2822], without time zone | `Tue, 01 Dec 2020 02:37:54` | + | `%a %b %e %T %Y` | [ctime][ctime] format | `Tue Dec 1 02:37:54 2020` | + | `%s` | [UNIX timestamp][unix_ts] | `1606790274` | + | `%a %d %b %T %Y` | [date][date] command, without time zone | `Tue 01 Dec 02:37:54 2020` | + | `%a %d %b %T %Z %Y` | [date][date] command, with time zone | `Tue 01 Dec 02:37:54 PST 2020` | + | `%a %d %b %T %z %Y` | [date][date] command, with numeric time zone | `Tue 01 Dec 02:37:54 -0700 2020` | + | `%a %d %b %T %#z %Y` | [date][date] command, with numeric time zone (minutes can be missing or present) | `Tue 01 Dec 02:37:54 -07 2020` | + + [date]: https://man7.org/linux/man-pages/man1/date.1.html + [ctime]: https://www.cplusplus.com/reference/ctime + [unix_ts]: https://en.wikipedia.org/wiki/Unix_time + [rfc822]: https://tools.ietf.org/html/rfc822#section-5 + [rfc2822]: https://tools.ietf.org/html/rfc2822#section-3.3 + [iso8601]: https://en.wikipedia.org/wiki/ISO_8601 + [rfc3339]: https://tools.ietf.org/html/rfc3339 + [chrono_fmt]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html#specifiers """ - required: true + required: false + relevant_when: "type = \"file\"" } - } - description: "File-specific settings." - required: true - relevant_when: "type = \"file\"" - } - schema: { - type: object: options: "*": { - type: string: {} - required: true - description: "Represents mapped log field names and types." - } - description: """ - Key/value pairs representing mapped log field names and types. - - This is used to coerce log fields from strings into their proper types. The available types are listed in the `Types` list below. - - Timestamp coercions need to be prefaced with `timestamp|`, for example `"timestamp|%F"`. Timestamp specifiers can use either of the following: - - 1. One of the built-in-formats listed in the `Timestamp Formats` table below. - 2. The [time format specifiers][chrono_fmt] from Rust’s `chrono` library. - - Types - - - **`bool`** - - **`string`** - - **`float`** - - **`integer`** - - **`date`** - - **`timestamp`** (see the table below for formats) - - Timestamp Formats - - | Format | Description | Example | - |----------------------|----------------------------------------------------------------------------------|----------------------------------| - | `%F %T` | `YYYY-MM-DD HH:MM:SS` | `2020-12-01 02:37:54` | - | `%v %T` | `DD-Mmm-YYYY HH:MM:SS` | `01-Dec-2020 02:37:54` | - | `%FT%T` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], without time zone | `2020-12-01T02:37:54` | - | `%FT%TZ` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], UTC | `2020-12-01T09:37:54Z` | - | `%+` | [ISO 8601][iso8601]/[RFC 3339][rfc3339], UTC, with time zone | `2020-12-01T02:37:54-07:00` | - | `%a, %d %b %Y %T` | [RFC 822][rfc822]/[RFC 2822][rfc2822], without time zone | `Tue, 01 Dec 2020 02:37:54` | - | `%a %b %e %T %Y` | [ctime][ctime] format | `Tue Dec 1 02:37:54 2020` | - | `%s` | [UNIX timestamp][unix_ts] | `1606790274` | - | `%a %d %b %T %Y` | [date][date] command, without time zone | `Tue 01 Dec 02:37:54 2020` | - | `%a %d %b %T %Z %Y` | [date][date] command, with time zone | `Tue 01 Dec 02:37:54 PST 2020` | - | `%a %d %b %T %z %Y` | [date][date] command, with numeric time zone | `Tue 01 Dec 02:37:54 -0700 2020` | - | `%a %d %b %T %#z %Y` | [date][date] command, with numeric time zone (minutes can be missing or present) | `Tue 01 Dec 02:37:54 -07 2020` | - - [date]: https://man7.org/linux/man-pages/man1/date.1.html - [ctime]: https://www.cplusplus.com/reference/ctime - [unix_ts]: https://en.wikipedia.org/wiki/Unix_time - [rfc822]: https://tools.ietf.org/html/rfc822#section-5 - [rfc2822]: https://tools.ietf.org/html/rfc2822#section-3.3 - [iso8601]: https://en.wikipedia.org/wiki/ISO_8601 - [rfc3339]: https://tools.ietf.org/html/rfc3339 - [chrono_fmt]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html#specifiers - """ - required: false - relevant_when: "type = \"file\"" - } - flush_interval: { - type: uint: {} - description: """ - The interval used for making writes visible in the table. - Longer intervals might get better performance, - but there is a longer delay before the data is visible in the table. - Since every TTL scan makes its changes visible, only use this value - if it is shorter than the `scan_interval`. - - By default, all writes are made visible immediately. - """ - required: false - relevant_when: "type = \"memory\"" - } - internal_metrics: { - type: object: options: include_key_tag: { - type: bool: default: false - description: """ - Determines whether to include the key tag on internal metrics. - - This is useful for distinguishing between different keys while monitoring. However, the tag's - cardinality is unbounded. - """ - required: false - } - description: "Configuration of internal metrics" - required: false - relevant_when: "type = \"memory\"" - } - max_byte_size: { - type: uint: {} - description: """ - Maximum size of the table in bytes. All insertions that make - this table bigger than the maximum size are rejected. + flush_interval: { + type: uint: {} + description: """ + The interval used for making writes visible in the table. + Longer intervals might get better performance, + but there is a longer delay before the data is visible in the table. + Since every TTL scan makes its changes visible, only use this value + if it is shorter than the `scan_interval`. - By default, there is no size limit. - """ - required: false - relevant_when: "type = \"memory\"" - } - scan_interval: { - type: uint: default: 30 - description: """ - The scan interval used to look for expired records. This is provided - as an optimization to ensure that TTL is updated, but without doing - too many cache scans. - """ - required: false - relevant_when: "type = \"memory\"" - } - source_config: { - type: object: options: { - export_batch_size: { + By default, all writes are made visible immediately. + """ + required: false + relevant_when: "type = \"memory\"" + } + internal_metrics: { + type: object: options: include_key_tag: { + type: bool: default: false + description: """ + Determines whether to include the key tag on internal metrics. + + This is useful for distinguishing between different keys while monitoring. However, the tag's + cardinality is unbounded. + """ + required: false + } + description: "Configuration of internal metrics" + required: false + relevant_when: "type = \"memory\"" + } + max_byte_size: { type: uint: {} description: """ - Batch size for data exporting. Used to prevent exporting entire table at - once and blocking the system. + Maximum size of the table in bytes. All insertions that make + this table bigger than the maximum size are rejected. - By default, batches are not used and entire table is exported. + By default, there is no size limit. """ - required: false + required: false + relevant_when: "type = \"memory\"" } - export_expired_items: { - type: bool: default: false + scan_interval: { + type: uint: default: 30 description: """ - Set to true to export expired items via the `expired` output port. - Expired items ignore other settings and are exported as they are flushed from the table. + The scan interval used to look for expired records. This is provided + as an optimization to ensure that TTL is updated, but without doing + too many cache scans. """ - required: false + required: false + relevant_when: "type = \"memory\"" } - export_interval: { - type: uint: {} - description: "Interval for exporting all data from the table when used as a source." - required: false + source_config: { + type: object: options: { + export_batch_size: { + type: uint: {} + description: """ + Batch size for data exporting. Used to prevent exporting entire table at + once and blocking the system. + + By default, batches are not used and entire table is exported. + """ + required: false + } + export_expired_items: { + type: bool: default: false + description: """ + Set to true to export expired items via the `expired` output port. + Expired items ignore other settings and are exported as they are flushed from the table. + """ + required: false + } + export_interval: { + type: uint: {} + description: "Interval for exporting all data from the table when used as a source." + required: false + } + remove_after_export: { + type: bool: default: false + description: """ + If set to true, all data will be removed from cache after exporting. + Only valid if used as a source and export_interval > 0 + + By default, export will not remove data from cache + """ + required: false + } + source_key: { + type: string: {} + description: """ + Key to use for this component when used as a source. This must be different from the + component key. + """ + required: true + } + } + description: "Configuration for source functionality." + required: false + relevant_when: "type = \"memory\"" } - remove_after_export: { - type: bool: default: false + ttl: { + type: uint: default: 600 description: """ - If set to true, all data will be removed from cache after exporting. - Only valid if used as a source and export_interval > 0 + TTL (time-to-live in seconds) is used to limit the lifetime of data stored in the cache. + When TTL expires, data behind a specific key in the cache is removed. + TTL is reset when the key is replaced. + """ + required: false + relevant_when: "type = \"memory\"" + } + ttl_field: { + type: string: default: "" + description: "Field in the incoming value used as the TTL override." + required: false + relevant_when: "type = \"memory\"" + } + locale: { + type: string: default: "en" + description: """ + The locale to use when querying the database. + + MaxMind includes localized versions of some of the fields within their database, such as + country name. This setting can control which of those localized versions are returned by the + transform. - By default, export will not remove data from cache + More information on which portions of the geolocation data are localized, and what languages + are available, can be found [here][locale_docs]. + + [locale_docs]: https://support.maxmind.com/hc/en-us/articles/4414877149467-IP-Geolocation-Data#h_01FRRGRYTGZB29ERDBZCX3MR8Q """ - required: false + required: false + relevant_when: "type = \"geoip\"" } - source_key: { + path: { type: string: {} description: """ - Key to use for this component when used as a source. This must be different from the - component key. + Path to the [MaxMind GeoIP2][geoip2] or [GeoLite2 binary city database file][geolite2] + (**GeoLite2-City.mmdb**). + + Other databases, such as the country database, are not supported. + `mmdb` enrichment table can be used for other databases. + + [geoip2]: https://dev.maxmind.com/geoip/geoip2/downloadable + [geolite2]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access """ + required: true + relevant_when: "type = \"geoip\" or type = \"mmdb\"" + } + type: { required: true + type: string: enum: { + file: "Exposes data from a static file as an enrichment table." + memory: """ + Exposes data from a memory cache as an enrichment table. The cache can be written to using + a sink. + """ + geoip: """ + Exposes data from a [MaxMind][maxmind] [GeoIP2][geoip2] database as an enrichment table. + + [maxmind]: https://www.maxmind.com/ + [geoip2]: https://www.maxmind.com/en/geoip2-databases + """ + mmdb: """ + Exposes data from a [MaxMind][maxmind] database as an enrichment table. + + [maxmind]: https://www.maxmind.com/ + """ + } + description: "enrichment table type" } } - description: "Configuration for source functionality." - required: false - relevant_when: "type = \"memory\"" - } - ttl: { - type: uint: default: 600 - description: """ - TTL (time-to-live in seconds) is used to limit the lifetime of data stored in the cache. - When TTL expires, data behind a specific key in the cache is removed. - TTL is reset when the key is replaced. - """ - required: false - relevant_when: "type = \"memory\"" - } - ttl_field: { - type: string: default: "" - description: "Field in the incoming value used as the TTL override." - required: false - relevant_when: "type = \"memory\"" + description: "An enrichment table." + required: true } - locale: { - type: string: default: "en" - description: """ - The locale to use when querying the database. + description: "All configured enrichment tables." + group: "pipeline_components" + } + healthchecks: { + type: object: options: { + enabled: { + type: bool: default: true + description: """ + Whether or not healthchecks are enabled for all sinks. - MaxMind includes localized versions of some of the fields within their database, such as - country name. This setting can control which of those localized versions are returned by the - transform. + Can be overridden on a per-sink basis. + """ + required: false + } + require_healthy: { + type: bool: default: false + description: """ + Whether or not to require a sink to report as being healthy during startup. - More information on which portions of the geolocation data are localized, and what languages - are available, can be found [here][locale_docs]. + When enabled and a sink reports not being healthy, Vector will exit during start-up. - [locale_docs]: https://support.maxmind.com/hc/en-us/articles/4414877149467-IP-Geolocation-Data#h_01FRRGRYTGZB29ERDBZCX3MR8Q - """ - required: false - relevant_when: "type = \"geoip\"" + Can be alternatively set, and overridden by, the `--require-healthy` command-line flag. + """ + required: false + } } - path: { - type: string: {} - description: """ - Path to the [MaxMind GeoIP2][geoip2] or [GeoLite2 binary city database file][geolite2] - (**GeoLite2-City.mmdb**). + description: "Healthcheck options." + group: "global_options" + } + schema: { + type: object: options: { + enabled: { + type: bool: default: false + description: """ + When enabled, Vector tracks the schema (field types and structure) of events as they flow + from sources through transforms to sinks. This allows Vector to understand what data each + component receives and produces. + """ + required: false + } + log_namespace: { + type: bool: {} + description: """ + Controls how metadata is stored in log events. - Other databases, such as the country database, are not supported. - `mmdb` enrichment table can be used for other databases. + When set to `false` (legacy mode), metadata fields like `host`, `timestamp`, and `source_type` + are stored as top-level fields alongside your log data. - [geoip2]: https://dev.maxmind.com/geoip/geoip2/downloadable - [geolite2]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access - """ - required: true - relevant_when: "type = \"geoip\" or type = \"mmdb\"" - } - type: { - required: true - type: string: enum: { - file: "Exposes data from a static file as an enrichment table." - memory: """ - Exposes data from a memory cache as an enrichment table. The cache can be written to using - a sink. - """ - geoip: """ - Exposes data from a [MaxMind][maxmind] [GeoIP2][geoip2] database as an enrichment table. + When set to `true` (Vector namespace mode), metadata is stored in a separate metadata namespace, + keeping it distinct from your actual log data. - [maxmind]: https://www.maxmind.com/ - [geoip2]: https://www.maxmind.com/en/geoip2-databases + See the [Log Namespacing guide](/guides/level-up/log_namespace/) for detailed information + about when to use Vector namespace mode and how to migrate from legacy mode. """ - mmdb: """ - Exposes data from a [MaxMind][maxmind] database as an enrichment table. + required: false + } + validation: { + type: bool: default: false + description: """ + When enabled, Vector validates that events flowing into each sink match the schema + requirements of that sink. If a sink requires certain fields or types that are missing + from the incoming events, Vector will report an error during configuration validation. - [maxmind]: https://www.maxmind.com/ + This helps catch pipeline configuration errors early, before runtime. """ + required: false } - description: "enrichment table type" - } - } - description: """ - Configuration options for an [enrichment table](https://vector.dev/docs/reference/glossary/#enrichment-tables) to be used in a - [`remap`](https://vector.dev/docs/reference/configuration/transforms/remap/) transform. Currently supported are: - - * [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) files - * [MaxMind](https://www.maxmind.com/en/home) databases - * In-memory storage - - For the lookup in the enrichment tables to be as performant as possible, the data is indexed according - to the fields that are used in the search. Note that indices can only be created for fields for which an - exact match is used in the condition. For range searches, an index isn't used and the enrichment table - drops back to a sequential scan of the data. A sequential scan shouldn't impact performance - significantly provided that there are only a few possible rows returned by the exact matches in the - condition. We don't recommend using a condition that uses only date range searches. - """ - common: false - required: false - } - secret: { - type: object: options: { - path: { - type: string: {} - description: "File path to read secrets from." - required: true - relevant_when: "type = \"file\" or type = \"directory\"" - } - remove_trailing_whitespace: { - type: bool: default: false - description: "Remove trailing whitespace from file contents." - required: false - relevant_when: "type = \"directory\"" } - command: { - type: array: items: type: string: {} - description: """ - Command arguments to execute. + description: """ + Schema options. - The path to the script or binary must be the first argument. - """ - required: true - relevant_when: "type = \"exec\"" - } - protocol: { + **Note:** The `enabled` and `validation` options are experimental and should only be enabled if you + understand the limitations. While the infrastructure exists for schema tracking and validation, the + full vision of automatic semantic field mapping and comprehensive schema enforcement was never fully + realized. + + If you encounter issues with these features, please [report them here](https://github.com/vectordotdev/vector/issues/new?template=bug.yml). + """ + group: "schema" + } + secret: { + type: object: options: "*": { type: object: options: { - backend_config: { - type: "*": {} - description: """ - The configuration to pass to the secrets executable. This is the `config` field in the - backend request. Refer to the documentation of your `backend_type `to see which options - are required to be set. - """ + path: { + type: string: {} + description: "File path to read secrets from." + required: true + relevant_when: "type = \"file\" or type = \"directory\"" + } + remove_trailing_whitespace: { + type: bool: default: false + description: "Remove trailing whitespace from file contents." required: false - relevant_when: "version = \"v1_1\"" + relevant_when: "type = \"directory\"" } - backend_type: { - type: string: {} - description: "The name of the backend. This is `type` field in the backend request." + command: { + type: array: items: type: string: {} + description: """ + Command arguments to execute. + + The path to the script or binary must be the first argument. + """ required: true - relevant_when: "version = \"v1_1\"" + relevant_when: "type = \"exec\"" } - version: { - required: false - type: string: { - enum: { - v1: "Expect the command to fetch the configuration options itself." - v1_1: "Configuration options to the command are to be curried upon each request." + protocol: { + type: object: options: { + backend_config: { + type: "*": {} + description: """ + The configuration to pass to the secrets executable. This is the `config` field in the + backend request. Refer to the documentation of your `backend_type `to see which options + are required to be set. + """ + required: false + relevant_when: "version = \"v1_1\"" + } + backend_type: { + type: string: {} + description: "The name of the backend. This is `type` field in the backend request." + required: true + relevant_when: "version = \"v1_1\"" + } + version: { + required: false + type: string: { + enum: { + v1: "Expect the command to fetch the configuration options itself." + v1_1: "Configuration options to the command are to be curried upon each request." + } + default: "v1" + } + description: "The protocol version." } - default: "v1" } - description: "The protocol version." + description: "Settings for the protocol between Vector and the secrets executable." + required: false + relevant_when: "type = \"exec\"" } - } - description: "Settings for the protocol between Vector and the secrets executable." - required: false - relevant_when: "type = \"exec\"" - } - timeout: { - type: uint: default: 5 - description: "The timeout, in seconds, to wait for the command to complete." - required: false - relevant_when: "type = \"exec\"" - } - auth: { - type: object: options: { - access_key_id: { - type: string: examples: ["AKIAIOSFODNN7EXAMPLE"] - description: "The AWS access key ID." - required: true + timeout: { + type: uint: default: 5 + description: "The timeout, in seconds, to wait for the command to complete." + required: false + relevant_when: "type = \"exec\"" } - assume_role: { - type: string: examples: ["arn:aws:iam::123456789098:role/my_role"] - description: """ - The ARN of an [IAM role][iam_role] to assume. + auth: { + type: object: options: { + access_key_id: { + type: string: examples: ["AKIAIOSFODNN7EXAMPLE"] + description: "The AWS access key ID." + required: true + } + assume_role: { + type: string: examples: ["arn:aws:iam::123456789098:role/my_role"] + description: """ + The ARN of an [IAM role][iam_role] to assume. - [iam_role]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html - """ - required: true + [iam_role]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html + """ + required: true + } + external_id: { + type: string: examples: ["randomEXAMPLEidString"] + description: """ + The optional unique external ID in conjunction with role to assume. + + [external_id]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html + """ + required: false + } + region: { + type: string: examples: ["us-west-2"] + description: """ + The [AWS region][aws_region] to send STS requests to. + + If not set, this defaults to the configured region + for the service itself. + + [aws_region]: https://docs.aws.amazon.com/general/latest/gr/rande.html#regional-endpoints + """ + required: false + } + secret_access_key: { + type: string: examples: ["wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"] + description: "The AWS secret access key." + required: true + } + session_name: { + type: string: examples: ["vector-indexer-role"] + description: """ + The optional [RoleSessionName][role_session_name] is a unique session identifier for your assumed role. + + Should be unique per principal or reason. + If not set, the session name is autogenerated like assume-role-provider-1736428351340 + + [role_session_name]: https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html + """ + required: false + } + session_token: { + type: string: examples: ["AQoDYXdz...AQoDYXdz..."] + description: """ + The AWS session token. + See [AWS temporary credentials](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html) + """ + required: false + } + credentials_file: { + type: string: examples: ["/my/aws/credentials"] + description: "Path to the credentials file." + required: true + } + profile: { + type: string: { + default: "default" + examples: ["develop"] + } + description: """ + The credentials profile to use. + + Used to select AWS credentials from a provided credentials file. + """ + required: false + } + imds: { + type: object: options: { + connect_timeout_seconds: { + type: uint: { + default: 1 + unit: "seconds" + } + description: "Connect timeout for IMDS." + required: false + } + max_attempts: { + type: uint: default: 4 + description: "Number of IMDS retries for fetching tokens and metadata." + required: false + } + read_timeout_seconds: { + type: uint: { + default: 1 + unit: "seconds" + } + description: "Read timeout for IMDS." + required: false + } + } + description: "Configuration for authenticating with AWS through IMDS." + required: false + } + load_timeout_secs: { + type: uint: { + examples: [30] + unit: "seconds" + } + description: """ + Timeout for successfully loading any credentials, in seconds. + + Relevant when the default credentials chain or `assume_role` is used. + """ + required: false + } + } + description: "Configuration of the authentication strategy for interacting with AWS services." + required: false + relevant_when: "type = \"aws_secrets_manager\"" } - external_id: { - type: string: examples: ["randomEXAMPLEidString"] - description: """ - The optional unique external ID in conjunction with role to assume. + secret_id: { + type: string: {} + description: "ID of the secret to resolve." + required: true + relevant_when: "type = \"aws_secrets_manager\"" + } + tls: { + type: object: options: { + alpn_protocols: { + type: array: items: type: string: examples: ["h2"] + description: """ + Sets the list of supported ALPN protocols. - [external_id]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html - """ - required: false + Declare the supported ALPN protocols, which are used during negotiation with a peer. They are prioritized in the order + that they are defined. + """ + required: false + } + ca_file: { + type: string: examples: ["/path/to/certificate_authority.crt"] + description: """ + Absolute path to an additional CA certificate file. + + The certificate must be in the DER or PEM (X.509) format. Additionally, the certificate can be provided as an inline string in PEM format. + """ + required: false + } + crt_file: { + type: string: examples: ["/path/to/host_certificate.crt"] + description: """ + Absolute path to a certificate file used to identify this server. + + The certificate must be in DER, PEM (X.509), or PKCS#12 format. Additionally, the certificate can be provided as + an inline string in PEM format. + + If this is set _and_ is not a PKCS#12 archive, `key_file` must also be set. + """ + required: false + } + key_file: { + type: string: examples: ["/path/to/host_certificate.key"] + description: """ + Absolute path to a private key file used to identify this server. + + The key must be in DER or PEM (PKCS#8) format. Additionally, the key can be provided as an inline string in PEM format. + """ + required: false + } + key_pass: { + type: string: examples: ["${KEY_PASS_ENV_VAR}", "PassWord1"] + description: """ + Passphrase used to unlock the encrypted key file. + + This has no effect unless `key_file` is set. + """ + required: false + } + server_name: { + type: string: examples: ["www.example.com"] + description: """ + Server name to use when using Server Name Indication (SNI). + + Only relevant for outgoing connections. + """ + required: false + } + verify_certificate: { + type: bool: {} + description: """ + Enables certificate verification. For components that create a server, this requires that the + client connections have a valid client certificate. For components that initiate requests, + this validates that the upstream has a valid certificate. + + If enabled, certificates must not be expired and must be issued by a trusted + issuer. This verification operates in a hierarchical manner, checking that the leaf certificate (the + certificate presented by the client/server) is not only valid, but that the issuer of that certificate is also valid, and + so on, until the verification process reaches a root certificate. + + Do NOT set this to `false` unless you understand the risks of not verifying the validity of certificates. + """ + required: false + } + verify_hostname: { + type: bool: {} + description: """ + Enables hostname verification. + + If enabled, the hostname used to connect to the remote host must be present in the TLS certificate presented by + the remote host, either as the Common Name or as an entry in the Subject Alternative Name extension. + + Only relevant for outgoing connections. + + Do NOT set this to `false` unless you understand the risks of not verifying the remote hostname. + """ + required: false + } + } + description: "TLS configuration." + required: false + relevant_when: "type = \"aws_secrets_manager\"" + } + endpoint: { + type: string: examples: ["http://127.0.0.0:5000/path/to/service"] + description: "Custom endpoint for use with AWS-compatible services." + required: false + relevant_when: "type = \"aws_secrets_manager\"" } region: { - type: string: examples: ["us-west-2"] + type: string: examples: ["us-east-1"] description: """ - The [AWS region][aws_region] to send STS requests to. - - If not set, this defaults to the configured region - for the service itself. + The [AWS region][aws_region] of the target service. [aws_region]: https://docs.aws.amazon.com/general/latest/gr/rande.html#regional-endpoints """ - required: false + required: false + relevant_when: "type = \"aws_secrets_manager\"" } - secret_access_key: { - type: string: examples: ["wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"] - description: "The AWS secret access key." - required: true + type: { + required: true + type: string: enum: { + file: "File." + directory: "Directory." + exec: "Exec." + aws_secrets_manager: "AWS Secrets Manager." + } + description: "secret type" } - session_name: { - type: string: examples: ["vector-indexer-role"] - description: """ - The optional [RoleSessionName][role_session_name] is a unique session identifier for your assumed role. + } + description: "A secret backend." + common: false + required: true + } + description: "All configured secrets backends." + group: "secrets" + } + sinks: { + type: object: options: "*": { + type: object: options: { + buffer: { + type: object: options: { + when_full: { + type: string: { + enum: { + block: """ + Wait for free space in the buffer. + + This applies backpressure up the topology, signalling that sources should slow down + the acceptance/consumption of events. This means that while no data is lost, data will pile + up at the edge. + """ + drop_newest: """ + Drops the event instead of waiting for free space in buffer. - Should be unique per principal or reason. - If not set, the session name is autogenerated like assume-role-provider-1736428351340 + The event will be intentionally dropped. This mode is typically used when performance is the + highest priority, and it is preferable to temporarily lose events rather than cause a + slowdown in the acceptance/consumption of events. + """ + } + default: "block" + } + description: "Event handling behavior when a buffer is full." + required: false + } + max_events: { + type: uint: default: 500 + required: false + description: "The maximum number of events allowed in the buffer." + relevant_when: "type = \"memory\"" + } + max_size: { + type: uint: unit: "bytes" + required: true + description: """ + The maximum allowed amount of allocated memory the buffer can hold. - [role_session_name]: https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html - """ - required: false - } - session_token: { - type: string: examples: ["AQoDYXdz...AQoDYXdz..."] + If `type = "disk"` then must be at least ~256 megabytes (268435488 bytes). + """ + } + type: { + required: false + type: string: { + enum: { + memory: """ + Events are buffered in memory. + + This is more performant, but less durable. Data will be lost if Vector is restarted + forcefully or crashes. + """ + disk: """ + Events are buffered on disk. + + This is less performant, but more durable. Data that has been synchronized to disk will not + be lost if Vector is restarted forcefully or crashes. + + Data is synchronized to disk every 500ms. + """ + } + default: "memory" + } + description: "The type of buffer to use." + } + } description: """ - The AWS session token. - See [AWS temporary credentials](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html) + Configures the buffering behavior for this sink. + + More information about the individual buffer types, and buffer behavior, can be found in the + [Buffering Model][buffering_model] section. + + [buffering_model]: /docs/architecture/buffering-model/ """ required: false } - credentials_file: { - type: string: examples: ["/my/aws/credentials"] - description: "Path to the credentials file." - required: true - } - profile: { - type: string: { - default: "default" - examples: ["develop"] + graph: { + type: object: options: node_attributes: { + type: object: { + options: "*": { + type: string: {} + required: true + description: "A single graph node attribute in graphviz DOT language." + } + examples: [{ + color: "red" + name: "Example Node" + width: "5.0" + }] + } + description: """ + Node attributes to add to this component's node in resulting graph + + They are added to the node as provided + """ + required: false } description: """ - The credentials profile to use. + Extra graph configuration - Used to select AWS credentials from a provided credentials file. + Configure output for component when generated with graph command """ required: false } - imds: { + healthcheck: { type: object: options: { - connect_timeout_seconds: { - type: uint: { - default: 1 + enabled: { + type: bool: default: true + description: "Whether or not to check the health of the sink when Vector starts up." + required: false + } + timeout: { + type: float: { + default: 10.0 unit: "seconds" } - description: "Connect timeout for IMDS." + description: "Timeout duration for healthcheck in seconds." required: false } - max_attempts: { - type: uint: default: 4 - description: "Number of IMDS retries for fetching tokens and metadata." + uri: { + type: string: {} + description: """ + The full URI to make HTTP healthcheck requests to. + + This must be a valid URI, which requires at least the scheme and host. All other + components -- port, path, etc -- are allowed as well. + """ + required: false + } + } + description: "Healthcheck configuration." + required: false + } + inputs: { + type: array: items: type: string: examples: ["my-source-or-transform-id", "prefix-*"] + description: """ + A list of upstream [source][sources] or [transform][transforms] IDs. + + Wildcards (`*`) are supported. + + See [configuration][configuration] for more info. + + [sources]: https://vector.dev/docs/reference/configuration/sources/ + [transforms]: https://vector.dev/docs/reference/configuration/transforms/ + [configuration]: https://vector.dev/docs/reference/configuration/ + """ + required: true + } + proxy: { + type: object: options: { + enabled: { + type: bool: default: true + description: "Enables proxying support." required: false } - read_timeout_seconds: { - type: uint: { - default: 1 - unit: "seconds" + http: { + type: string: examples: ["http://foo.bar:3128"] + description: """ + Proxy endpoint to use when proxying HTTP traffic. + + Must be a valid URI string. + """ + required: false + } + https: { + type: string: examples: ["http://foo.bar:3128"] + description: """ + Proxy endpoint to use when proxying HTTPS traffic. + + Must be a valid URI string. + """ + required: false + } + no_proxy: { + type: array: { + items: type: string: examples: ["localhost", ".foo.bar", "*"] + default: [] } - description: "Read timeout for IMDS." - required: false + description: """ + A list of hosts to avoid proxying. + + Multiple patterns are allowed: + + | Pattern | Example match | + | ------------------- | --------------------------------------------------------------------------- | + | Domain names | `example.com` matches requests to `example.com` | + | Wildcard domains | `.example.com` matches requests to `example.com` and its subdomains | + | IP addresses | `127.0.0.1` matches requests to `127.0.0.1` | + | [CIDR][cidr] blocks | `192.168.0.0/16` matches requests to any IP addresses in this range | + | Splat | `*` matches all hosts | + + [cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing + """ + required: false } } - description: "Configuration for authenticating with AWS through IMDS." - required: false - } - load_timeout_secs: { - type: uint: { - examples: [30] - unit: "seconds" - } description: """ - Timeout for successfully loading any credentials, in seconds. + Proxy configuration. + + Configure to proxy traffic through an HTTP(S) proxy when making external requests. - Relevant when the default credentials chain or `assume_role` is used. + Similar to common proxy configuration convention, you can set different proxies + to use based on the type of traffic being proxied. You can also set specific hosts that + should not be proxied. """ required: false } } - description: "Configuration of the authentication strategy for interacting with AWS services." - required: false - relevant_when: "type = \"aws_secrets_manager\"" + description: "A sink." + required: true } - secret_id: { - type: string: {} - description: "ID of the secret to resolve." - required: true - relevant_when: "type = \"aws_secrets_manager\"" - } - tls: { + description: "All configured sinks." + group: "pipeline_components" + } + sources: { + type: object: options: "*": { type: object: options: { - alpn_protocols: { - type: array: items: type: string: examples: ["h2"] - description: """ - Sets the list of supported ALPN protocols. + graph: { + type: object: options: node_attributes: { + type: object: { + options: "*": { + type: string: {} + required: true + description: "A single graph node attribute in graphviz DOT language." + } + examples: [{ + color: "red" + name: "Example Node" + width: "5.0" + }] + } + description: """ + Node attributes to add to this component's node in resulting graph - Declare the supported ALPN protocols, which are used during negotiation with a peer. They are prioritized in the order - that they are defined. - """ - required: false - } - ca_file: { - type: string: examples: ["/path/to/certificate_authority.crt"] + They are added to the node as provided + """ + required: false + } description: """ - Absolute path to an additional CA certificate file. + Extra graph configuration - The certificate must be in the DER or PEM (X.509) format. Additionally, the certificate can be provided as an inline string in PEM format. + Configure output for component when generated with graph command """ required: false } - crt_file: { - type: string: examples: ["/path/to/host_certificate.crt"] - description: """ - Absolute path to a certificate file used to identify this server. + proxy: { + type: object: options: { + enabled: { + type: bool: default: true + description: "Enables proxying support." + required: false + } + http: { + type: string: examples: ["http://foo.bar:3128"] + description: """ + Proxy endpoint to use when proxying HTTP traffic. - The certificate must be in DER, PEM (X.509), or PKCS#12 format. Additionally, the certificate can be provided as - an inline string in PEM format. + Must be a valid URI string. + """ + required: false + } + https: { + type: string: examples: ["http://foo.bar:3128"] + description: """ + Proxy endpoint to use when proxying HTTPS traffic. - If this is set _and_ is not a PKCS#12 archive, `key_file` must also be set. - """ - required: false - } - key_file: { - type: string: examples: ["/path/to/host_certificate.key"] - description: """ - Absolute path to a private key file used to identify this server. + Must be a valid URI string. + """ + required: false + } + no_proxy: { + type: array: { + items: type: string: examples: ["localhost", ".foo.bar", "*"] + default: [] + } + description: """ + A list of hosts to avoid proxying. - The key must be in DER or PEM (PKCS#8) format. Additionally, the key can be provided as an inline string in PEM format. - """ - required: false - } - key_pass: { - type: string: examples: ["${KEY_PASS_ENV_VAR}", "PassWord1"] - description: """ - Passphrase used to unlock the encrypted key file. + Multiple patterns are allowed: - This has no effect unless `key_file` is set. - """ - required: false - } - server_name: { - type: string: examples: ["www.example.com"] + | Pattern | Example match | + | ------------------- | --------------------------------------------------------------------------- | + | Domain names | `example.com` matches requests to `example.com` | + | Wildcard domains | `.example.com` matches requests to `example.com` and its subdomains | + | IP addresses | `127.0.0.1` matches requests to `127.0.0.1` | + | [CIDR][cidr] blocks | `192.168.0.0/16` matches requests to any IP addresses in this range | + | Splat | `*` matches all hosts | + + [cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing + """ + required: false + } + } description: """ - Server name to use when using Server Name Indication (SNI). + Proxy configuration. + + Configure to proxy traffic through an HTTP(S) proxy when making external requests. - Only relevant for outgoing connections. + Similar to common proxy configuration convention, you can set different proxies + to use based on the type of traffic being proxied. You can also set specific hosts that + should not be proxied. """ required: false } - verify_certificate: { - type: bool: {} - description: """ - Enables certificate verification. For components that create a server, this requires that the - client connections have a valid client certificate. For components that initiate requests, - this validates that the upstream has a valid certificate. + } + description: "A source." + required: true + } + description: "All configured sources." + group: "pipeline_components" + } + transforms: { + type: object: options: "*": { + type: object: options: { + graph: { + type: object: options: node_attributes: { + type: object: { + options: "*": { + type: string: {} + required: true + description: "A single graph node attribute in graphviz DOT language." + } + examples: [{ + color: "red" + name: "Example Node" + width: "5.0" + }] + } + description: """ + Node attributes to add to this component's node in resulting graph - If enabled, certificates must not be expired and must be issued by a trusted - issuer. This verification operates in a hierarchical manner, checking that the leaf certificate (the - certificate presented by the client/server) is not only valid, but that the issuer of that certificate is also valid, and - so on, until the verification process reaches a root certificate. + They are added to the node as provided + """ + required: false + } + description: """ + Extra graph configuration - Do NOT set this to `false` unless you understand the risks of not verifying the validity of certificates. + Configure output for component when generated with graph command """ required: false } - verify_hostname: { - type: bool: {} + inputs: { + type: array: items: type: string: examples: ["my-source-or-transform-id", "prefix-*"] description: """ - Enables hostname verification. + A list of upstream [source][sources] or [transform][transforms] IDs. - If enabled, the hostname used to connect to the remote host must be present in the TLS certificate presented by - the remote host, either as the Common Name or as an entry in the Subject Alternative Name extension. + Wildcards (`*`) are supported. - Only relevant for outgoing connections. + See [configuration][configuration] for more info. - Do NOT set this to `false` unless you understand the risks of not verifying the remote hostname. + [sources]: https://vector.dev/docs/reference/configuration/sources/ + [transforms]: https://vector.dev/docs/reference/configuration/transforms/ + [configuration]: https://vector.dev/docs/reference/configuration/ """ - required: false + required: true } } - description: "TLS configuration." - required: false - relevant_when: "type = \"aws_secrets_manager\"" + description: "A transform." + required: true } - endpoint: { - type: string: examples: ["http://127.0.0.0:5000/path/to/service"] - description: "Custom endpoint for use with AWS-compatible services." - required: false - relevant_when: "type = \"aws_secrets_manager\"" - } - region: { - type: string: examples: ["us-east-1"] + description: "All configured transforms." + group: "pipeline_components" + } + acknowledgements: { + type: object: options: enabled: { + type: bool: {} description: """ - The [AWS region][aws_region] of the target service. + Controls whether or not end-to-end acknowledgements are enabled. - [aws_region]: https://docs.aws.amazon.com/general/latest/gr/rande.html#regional-endpoints + When enabled for a sink, any source that supports end-to-end + acknowledgements that is connected to that sink waits for events + to be acknowledged by **all connected sinks** before acknowledging them at the source. + + Enabling or disabling acknowledgements at the sink level takes precedence over any global + [`acknowledgements`][global_acks] configuration. + + [global_acks]: https://vector.dev/docs/reference/configuration/global-options/#acknowledgements """ - required: false - relevant_when: "type = \"aws_secrets_manager\"" - } - type: { - required: true - type: string: enum: { - file: "File." - directory: "Directory." - exec: "Exec." - aws_secrets_manager: "AWS Secrets Manager." - } - description: "secret type" + required: false } - } - description: """ - Configuration options to retrieve secrets from external backend in order to avoid storing secrets in plaintext - in Vector config. Multiple backends can be configured. Use `SECRET[.]` to tell Vector to retrieve the secret. This placeholder is replaced by the secret - retrieved from the relevant backend. + description: """ + Controls how acknowledgements are handled for all sinks by default. - When `type` is `exec`, the provided command will be run and provided a list of - secrets to fetch, determined from the configuration file, on stdin as JSON in the format: + See [End-to-end Acknowledgements][e2e_acks] for more information on how Vector handles event + acknowledgement. - ```json - {"version": "1.0", "secrets": ["secret1", "secret2"]} - ``` + [e2e_acks]: https://vector.dev/docs/architecture/end-to-end-acknowledgements/ + """ + common: true + required: false + group: "global_options" + } + buffer_utilization_ewma_half_life_seconds: { + type: float: {} + description: """ + The half-life, in seconds, for the exponential weighted moving average (EWMA) of source + and transform buffer utilization metrics. - The executable is expected to respond with the values of these secrets on stdout, also as JSON, in the format: + This controls how quickly the `*_buffer_utilization_mean` gauges respond to new + observations. Longer half-lives retain more of the previous value, leading to slower + adjustments. - ```json - { - "secret1": {"value": "secret_value", "error": null}, - "secret2": {"value": null, "error": "could not fetch the secret"} - } - ``` - If an `error` is returned for any secrets, or if the command exits with a non-zero status code, - Vector will log the errors and exit. - - Otherwise, the secret must be a JSON text string with key/value pairs. For example: - ```json - { - "username": "test", - "password": "example-password" - } - ``` + - Lower values (< 1): Metrics update quickly but may be volatile + - Default (5): Balanced between responsiveness and stability + - Higher values (> 5): Smooth, stable metrics that update slowly - If an error occurred while reading the file or retrieving the secrets, Vector logs the error and exits. + Adjust based on whether you need fast detection of buffer issues (lower) + or want to see sustained trends without noise (higher). - Secrets are loaded when Vector starts or if Vector receives a `SIGHUP` signal triggering its - configuration reload process. - """ - common: false - required: false - } - acknowledgements: { - common: true - description: """ - Controls how acknowledgements are handled for all sinks by default. - - See [End-to-end Acknowledgements][e2e_acks] for more information on how Vector handles event - acknowledgement. - - [e2e_acks]: https://vector.dev/docs/architecture/end-to-end-acknowledgements/ - """ - required: false - type: object: options: enabled: { + Must be greater than 0. + """ + group: "global_options" + } + data_dir: { + type: string: default: "/var/lib/vector/" description: """ - Controls whether or not end-to-end acknowledgements are enabled. + The directory used for persisting Vector state data. - When enabled for a sink, any source that supports end-to-end - acknowledgements that is connected to that sink waits for events - to be acknowledged by **all connected sinks** before acknowledging them at the source. + This is the directory where Vector will store any state data, such as disk buffers, file + checkpoints, and more. - Enabling or disabling acknowledgements at the sink level takes precedence over any global - [`acknowledgements`][global_acks] configuration. - - [global_acks]: https://vector.dev/docs/reference/configuration/global-options/#acknowledgements + Vector must have write permissions to this directory. """ - required: false - type: bool: {} + common: false + group: "global_options" } - } - buffer_utilization_ewma_half_life_seconds: { - description: """ - The half-life, in seconds, for the exponential weighted moving average (EWMA) of source - and transform buffer utilization metrics. - - This controls how quickly the `*_buffer_utilization_mean` gauges respond to new - observations. Longer half-lives retain more of the previous value, leading to slower - adjustments. - - - Lower values (< 1): Metrics update quickly but may be volatile - - Default (5): Balanced between responsiveness and stability - - Higher values (> 5): Smooth, stable metrics that update slowly - - Adjust based on whether you need fast detection of buffer issues (lower) - or want to see sustained trends without noise (higher). - - Must be greater than 0. - """ - required: false - type: float: {} - } - data_dir: { - common: false - description: """ - The directory used for persisting Vector state data. - - This is the directory where Vector will store any state data, such as disk buffers, file - checkpoints, and more. - - Vector must have write permissions to this directory. - """ - required: false - type: string: default: "/var/lib/vector/" - } - expire_metrics_per_metric_set: { - description: """ - This allows configuring different expiration intervals for different metric sets. - By default this is empty and any metric not matched by one of these sets will use - the global default value, defined using `expire_metrics_secs`. - """ - required: false - type: array: items: type: object: options: { - expire_secs: { - description: """ - The amount of time, in seconds, that internal metrics will persist after having not been - updated before they expire and are removed. + expire_metrics_per_metric_set: { + type: array: items: type: object: options: { + expire_secs: { + type: float: examples: [60.0] + description: """ + The amount of time, in seconds, that internal metrics will persist after having not been + updated before they expire and are removed. - Set this to a value larger than your `internal_metrics` scrape interval (default 5 minutes) - so that metrics live long enough to be emitted and captured. - """ - required: true - type: float: examples: [60.0] - } - labels: { - description: "Labels to apply this expiration to. Ignores labels if not defined." - required: false - type: object: options: { - matchers: { - description: "List of matchers to check." - required: true - type: array: items: type: object: options: { - key: { - description: "Metric key to look for." - required: true - type: string: {} - } - type: { - description: "Metric label matcher type." - required: true - type: string: enum: { - exact: "Looks for an exact match of one label key value pair." - regex: "Compares label value with given key to the provided pattern." + Set this to a value larger than your `internal_metrics` scrape interval (default 5 minutes) + so that metrics live long enough to be emitted and captured. + """ + required: true + } + labels: { + type: object: options: { + matchers: { + type: array: items: type: object: options: { + key: { + type: string: {} + description: "Metric key to look for." + required: true + } + value: { + type: string: {} + description: "The exact metric label value." + required: true + relevant_when: "type = \"exact\"" + } + value_pattern: { + type: string: {} + description: "Pattern to compare metric label value to." + required: true + relevant_when: "type = \"regex\"" + } + type: { + required: true + type: string: enum: { + exact: "Looks for an exact match of one label key value pair." + regex: "Compares label value with given key to the provided pattern." + } + description: "Metric label matcher type." } } - value: { - description: "The exact metric label value." - relevant_when: "type = \"exact\"" - required: true - type: string: {} - } - value_pattern: { - description: "Pattern to compare metric label value to." - relevant_when: "type = \"regex\"" - required: true - type: string: {} - } + description: "List of matchers to check." + required: true } - } - type: { - description: "Metric label group matcher type." - required: true - type: string: enum: { - all: "Checks that all of the provided matchers can be applied to given metric." - any: "Checks that any of the provided matchers can be applied to given metric." + type: { + required: true + type: string: enum: { + any: "Checks that any of the provided matchers can be applied to given metric." + all: "Checks that all of the provided matchers can be applied to given metric." + } + description: "Metric label group matcher type." } } + description: "Labels to apply this expiration to. Ignores labels if not defined." + required: false } - } - name: { - description: "Metric name to apply this expiration to. Ignores metric name if not defined." - required: false - type: object: options: { - pattern: { - description: "Pattern to compare to." - relevant_when: "type = \"regex\"" - required: true - type: string: {} - } - type: { - description: "Metric name matcher type." - required: true - type: string: enum: { - exact: "Only considers exact name matches." - regex: "Compares metric name to the provided pattern." + name: { + type: object: options: { + value: { + type: string: {} + description: "The exact metric name." + required: true + relevant_when: "type = \"exact\"" + } + pattern: { + type: string: {} + description: "Pattern to compare to." + required: true + relevant_when: "type = \"regex\"" + } + type: { + required: true + type: string: enum: { + exact: "Only considers exact name matches." + regex: "Compares metric name to the provided pattern." + } + description: "Metric name matcher type." } } - value: { - description: "The exact metric name." - relevant_when: "type = \"exact\"" - required: true - type: string: {} - } + description: "Metric name to apply this expiration to. Ignores metric name if not defined." + required: false } } + description: """ + This allows configuring different expiration intervals for different metric sets. + By default this is empty and any metric not matched by one of these sets will use + the global default value, defined using `expire_metrics_secs`. + """ + group: "global_options" } - } - expire_metrics_secs: { - common: false - description: """ - The amount of time, in seconds, that internal metrics will persist after having not been - updated before they expire and are removed. - - Set this to a value larger than your `internal_metrics` scrape interval (default 5 minutes) - so metrics live long enough to be emitted and captured. - """ - required: false - type: float: {} - } - latency_ewma_alpha: { - description: """ - The alpha value for the exponential weighted moving average (EWMA) of transform latency - metrics. - - This controls how quickly the `component_latency_mean_seconds` gauge responds to new - observations. Values closer to 1.0 retain more of the previous value, leading to slower - adjustments. The default value of 0.9 is equivalent to a "half life" of 6-7 measurements. - - Must be between 0 and 1 exclusively (0 < alpha < 1). - """ - required: false - type: float: {} - } - log_schema: { - common: false - description: """ - Default log schema for all events. - - This is used if a component does not have its own specific log schema. All events use a log - schema, whether or not the default is used, to assign event fields on incoming events. - """ - required: false - type: object: options: { - host_key: { - description: """ - The name of the event field to treat as the host which sent the message. - - This field will generally represent a real host, or container, that generated the message, - but is somewhat source-dependent. - """ - required: false - type: string: default: ".host" - } - message_key: { - description: """ - The name of the event field to treat as the event message. + expire_metrics_secs: { + type: float: {} + description: """ + The amount of time, in seconds, that internal metrics will persist after having not been + updated before they expire and are removed. - This would be the field that holds the raw message, such as a raw log line. - """ - required: false - type: string: default: ".message" - } - metadata_key: { - description: """ - The name of the event field to set the event metadata in. + Set this to a value larger than your `internal_metrics` scrape interval (default 5 minutes) + so metrics live long enough to be emitted and captured. + """ + common: false + required: false + group: "global_options" + } + latency_ewma_alpha: { + type: float: {} + description: """ + The alpha value for the exponential weighted moving average (EWMA) of transform latency + metrics. - Generally, this field will be set by Vector to hold event-specific metadata, such as - annotations by the `remap` transform when an error or abort is encountered. - """ - required: false - type: string: default: ".metadata" - } - source_type_key: { - description: """ - The name of the event field to set the source identifier in. + This controls how quickly the `component_latency_mean_seconds` gauge responds to new + observations. Values closer to 1.0 retain more of the previous value, leading to slower + adjustments. The default value of 0.9 is equivalent to a "half life" of 6-7 measurements. - This field will be set by the Vector source that the event was created in. - """ - required: false - type: string: default: ".source_type" - } - timestamp_key: { - description: "The name of the event field to treat as the event timestamp." - required: false - type: string: default: ".timestamp" - } + Must be between 0 and 1 exclusively (0 < alpha < 1). + """ + group: "global_options" } - } - metrics_storage_refresh_period: { - description: """ - The interval, in seconds, at which the internal metrics cache for VRL is refreshed. - This must be set to be able to access metrics in VRL functions. - - Higher values lead to stale metric values from `get_vector_metric`, - `find_vector_metrics`, and `aggregate_vector_metrics` functions. - """ - required: false - type: float: {} - } - proxy: { - common: false - description: """ - Proxy configuration. - - Configure to proxy traffic through an HTTP(S) proxy when making external requests. - - Similar to common proxy configuration convention, you can set different proxies - to use based on the type of traffic being proxied. You can also set specific hosts that - should not be proxied. - """ - required: false - type: object: options: { - enabled: { - description: "Enables proxying support." - required: false - type: bool: default: true - } - http: { - description: """ - Proxy endpoint to use when proxying HTTP traffic. - - Must be a valid URI string. - """ - required: false - type: string: examples: ["http://foo.bar:3128"] - } - https: { - description: """ - Proxy endpoint to use when proxying HTTPS traffic. + log_schema: { + type: object: options: { + host_key: { + type: string: default: ".host" + description: """ + The name of the event field to treat as the host which sent the message. - Must be a valid URI string. - """ - required: false - type: string: examples: ["http://foo.bar:3128"] - } - no_proxy: { - description: """ - A list of hosts to avoid proxying. + This field will generally represent a real host, or container, that generated the message, + but is somewhat source-dependent. + """ + required: false + } + message_key: { + type: string: default: ".message" + description: """ + The name of the event field to treat as the event message. - Multiple patterns are allowed: + This would be the field that holds the raw message, such as a raw log line. + """ + required: false + } + metadata_key: { + type: string: default: ".metadata" + description: """ + The name of the event field to set the event metadata in. - | Pattern | Example match | - | ------------------- | --------------------------------------------------------------------------- | - | Domain names | `example.com` matches requests to `example.com` | - | Wildcard domains | `.example.com` matches requests to `example.com` and its subdomains | - | IP addresses | `127.0.0.1` matches requests to `127.0.0.1` | - | [CIDR][cidr] blocks | `192.168.0.0/16` matches requests to any IP addresses in this range | - | Splat | `*` matches all hosts | + Generally, this field will be set by Vector to hold event-specific metadata, such as + annotations by the `remap` transform when an error or abort is encountered. + """ + required: false + } + source_type_key: { + type: string: default: ".source_type" + description: """ + The name of the event field to set the source identifier in. - [cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing - """ - required: false - type: array: { - default: [] - items: type: string: examples: ["localhost", ".foo.bar", "*"] + This field will be set by the Vector source that the event was created in. + """ + required: false + } + timestamp_key: { + type: string: default: ".timestamp" + description: "The name of the event field to treat as the event timestamp." + required: false } } + description: """ + Default log schema for all events. + + This is used if a component does not have its own specific log schema. All events use a log + schema, whether or not the default is used, to assign event fields on incoming events. + """ + common: false + required: false + group: "schema" } - } - telemetry: { - common: false - description: """ - Telemetry options. - - Determines whether `source` and `service` tags should be emitted with the - `component_sent_*` and `component_received_*` events. - """ - required: false - type: object: options: tags: { - description: "Configures whether to emit certain tags" - required: false + metrics_storage_refresh_period: { + type: float: {} + description: """ + The interval, in seconds, at which the internal metrics cache for VRL is refreshed. + This must be set to be able to access metrics in VRL functions. + + Higher values lead to stale metric values from `get_vector_metric`, + `find_vector_metrics`, and `aggregate_vector_metrics` functions. + """ + group: "global_options" + } + proxy: { type: object: options: { - emit_service: { + enabled: { + type: bool: default: true + description: "Enables proxying support." + required: false + } + http: { + type: string: examples: ["http://foo.bar:3128"] description: """ - True if the `service` tag should be emitted - in the `component_received_*` and `component_sent_*` - telemetry. + Proxy endpoint to use when proxying HTTP traffic. + + Must be a valid URI string. """ required: false - type: bool: default: false } - emit_source: { + https: { + type: string: examples: ["http://foo.bar:3128"] description: """ - True if the `source` tag should be emitted - in the `component_received_*` and `component_sent_*` - telemetry. + Proxy endpoint to use when proxying HTTPS traffic. + + Must be a valid URI string. + """ + required: false + } + no_proxy: { + type: array: { + items: type: string: examples: ["localhost", ".foo.bar", "*"] + default: [] + } + description: """ + A list of hosts to avoid proxying. + + Multiple patterns are allowed: + + | Pattern | Example match | + | ------------------- | --------------------------------------------------------------------------- | + | Domain names | `example.com` matches requests to `example.com` | + | Wildcard domains | `.example.com` matches requests to `example.com` and its subdomains | + | IP addresses | `127.0.0.1` matches requests to `127.0.0.1` | + | [CIDR][cidr] blocks | `192.168.0.0/16` matches requests to any IP addresses in this range | + | Splat | `*` matches all hosts | + + [cidr]: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing """ required: false - type: bool: default: false } } + description: """ + Proxy configuration. + + Configure to proxy traffic through an HTTP(S) proxy when making external requests. + + Similar to common proxy configuration convention, you can set different proxies + to use based on the type of traffic being proxied. You can also set specific hosts that + should not be proxied. + """ + common: false + required: false + group: "global_options" } - } - timezone: { - common: false - description: """ - The name of the time zone to apply to timestamp conversions that do not contain an explicit time zone. + telemetry: { + type: object: options: tags: { + type: object: options: { + emit_service: { + type: bool: default: false + description: """ + True if the `service` tag should be emitted + in the `component_received_*` and `component_sent_*` + telemetry. + """ + required: false + } + emit_source: { + type: bool: default: false + description: """ + True if the `source` tag should be emitted + in the `component_received_*` and `component_sent_*` + telemetry. + """ + required: false + } + } + description: "Configures whether to emit certain tags" + required: false + } + description: """ + Telemetry options. + + Determines whether `source` and `service` tags should be emitted with the + `component_sent_*` and `component_received_*` events. + """ + common: false + required: false + group: "global_options" + } + timezone: { + type: string: examples: ["local", "America/New_York", "EST5EDT"] + description: """ + The name of the time zone to apply to timestamp conversions that do not contain an explicit time zone. - The time zone name may be any name in the [TZ database][tzdb] or `local` to indicate system - local time. + The time zone name may be any name in the [TZ database][tzdb] or `local` to indicate system + local time. - Note that in Vector/VRL all timestamps are represented in UTC. + Note that in Vector/VRL all timestamps are represented in UTC. + + [tzdb]: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + """ + common: false + group: "global_options" + } + wildcard_matching: { + type: string: enum: { + strict: "Strict matching (must match at least one existing input)" + relaxed: "Relaxed matching (must match 0 or more inputs)" + } + description: """ + Set wildcard matching mode for inputs - [tzdb]: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones - """ - required: false - type: string: examples: ["local", "America/New_York", "EST5EDT"] + Setting this to "relaxed" allows configurations with wildcards that do not match any inputs + to be accepted without causing an error. + """ + common: false + required: false + group: "global_options" + } } - wildcard_matching: { - common: false - description: """ - Set wildcard matching mode for inputs - - Setting this to "relaxed" allows configurations with wildcards that do not match any inputs - to be accepted without causing an error. - """ - required: false - type: string: enum: { - relaxed: "Relaxed matching (must match 0 or more inputs)" - strict: "Strict matching (must match at least one existing input)" + groups: { + global_options: { + title: "Global Options" + description: "Global configuration options that apply to Vector as a whole." + order: 1 + } + pipeline_components: { + title: "Pipeline Components" + description: "Configure sources, transforms, sinks, and enrichment tables for your observability pipeline." + order: 2 + } + api: { + title: "API" + description: "Configure Vector's observability API." + order: 3 + } + schema: { + title: "Schema" + description: "Configure Vector's internal schema system for type tracking and validation." + order: 4 + } + secrets: { + title: "Secrets" + description: "Configure secrets management for secure configuration." + order: 5 } } } diff --git a/website/cue/reference/releases/0.23.0.cue b/website/cue/reference/releases/0.23.0.cue index b0419016dc709..c70c555519bc5 100644 --- a/website/cue/reference/releases/0.23.0.cue +++ b/website/cue/reference/releases/0.23.0.cue @@ -151,7 +151,7 @@ releases: "0.23.0": { description: """ The `azure_blob` sink now supports loading credentials from environment variables and via the managed identity service. To use this, set the new [`storage_account` - parameter](/docs/reference/configuration/sinks/azure_blob/#storage_account). + parameter](/docs/reference/configuration/sinks/azure_blob/). """ pr_numbers: [12821, 12959] contributors: ["yvespp"] diff --git a/website/cue/reference/releases/0.24.0.cue b/website/cue/reference/releases/0.24.0.cue index d61e1604d8b0f..9400547429cdc 100644 --- a/website/cue/reference/releases/0.24.0.cue +++ b/website/cue/reference/releases/0.24.0.cue @@ -155,7 +155,7 @@ releases: "0.24.0": { scopes: ["enrichment tables"] description: """ A new enrichment table type was added, - [`geoip`](/docs/reference/configuration/global-options/#enrichment_tables.type). + [`geoip`](/docs/reference/configuration/pipeline-components/#enrichment_tables..type). This can be used with [VRL's enrichment table functions](/docs/reference/vrl/functions/#enrichment-functions) to enrich events using a [GeoIP database](https://www.maxmind.com/en/geoip2-databases). diff --git a/website/cue/reference/releases/0.54.0.cue b/website/cue/reference/releases/0.54.0.cue new file mode 100644 index 0000000000000..58ab7f56afde4 --- /dev/null +++ b/website/cue/reference/releases/0.54.0.cue @@ -0,0 +1,457 @@ +package metadata + +releases: "0.54.0": { + date: "2026-03-10" + codename: "" + + whats_next: [] + + description: """ + The Vector team is excited to announce version `0.54.0`! + + ## Release highlights + - Enhanced `vector top` with new keybinds for scrolling, sorting, and filtering. Press `?` to + see all available keybinds. + - The `datadog_logs` sink now defaults to `zstd` compression instead of no compression, resulting + in better network efficiency and higher throughput. + - Added `component_latency_seconds` histogram and `component_latency_mean_seconds` gauge internal + metrics, exposing the time an event spends in a component. + - Syslog encoding transform received major upgrades with improved RFC compliance, support for + scalars/nested objects/arrays in structured data, and better UTF-8 safety. + - Added a new `azure_logs_ingestion` sink that supports the Azure Monitor Logs Ingestion API. + The existing `azure_monitor_logs` sink is now deprecated, and users should migrate before + Microsoft ends support for the old Data Collector API (currently scheduled for September 2026). + + ## Breaking Changes + + - The `datadog_logs` sink now defaults to `zstd` compression. You can explicitly set `compression` to preserve + previous behavior. + """ + + changelog: [ + { + type: "fix" + description: """ + Fixed a hard-to-trigger race between closing a memory buffer and outstanding + sends that could rarely cause a lost event array at shutdown. + """ + contributors: ["bruceg"] + }, + { + type: "feat" + description: """ + Add support for the Azure Monitor Logs Ingestion API through a new `azure_logs_ingestion` sink. + + The `azure_monitor_logs` sink is now deprecated, and current users will need to migrate to `azure_logs_ingestion` before Microsoft end support for the old Data Collector API (currently scheduled for September 2026). + """ + contributors: ["jlaundry"] + }, + { + type: "fix" + description: """ + Remove the `tokio-util` patch override and preserve recoverable decoding behavior via `DecoderFramedRead`. + """ + contributors: ["Trighap52"] + }, + { + type: "enhancement" + description: """ + The `clickhouse` sink now supports complex data types (Array, Map, and Tuple) when using the `arrow_stream` format. + """ + contributors: ["benjamin-awd"] + }, + { + type: "feat" + description: """ + Added new keybinds to `vector top` for scrolling, sorting and filtering. You can now press `?` when using `vector top` to see all available keybinds. + """ + contributors: ["esensar", "Quad9DNS"] + }, + { + type: "fix" + description: """ + The `log_to_metric` transform now correctly handles aggregated histogram and aggregated summary metrics. + """ + contributors: ["jblazquez"] + }, + { + type: "enhancement" + description: """ + The `prometheus_remote_write` sink now supports the `healthcheck.uri` field to customize the healthcheck endpoint. + """ + contributors: ["simonhammes"] + }, + { + type: "fix" + description: """ + Fixed recording of buffer utilization metrics to properly record on both send + and receive in order to reflect the actual level and not just the "full" level. + """ + contributors: ["bruceg"] + }, + { + type: "fix" + description: """ + The ClickHouse sink's ArrowStream format now correctly handles MATERIALIZED, ALIAS, EPHEMERAL, and DEFAULT columns. MATERIALIZED, ALIAS, and EPHEMERAL columns are excluded from the fetched schema since they cannot receive INSERT data. DEFAULT columns are kept but marked nullable so events are not rejected when the server-computed value is omitted. + """ + contributors: ["benjamin-awd"] + }, + { + type: "fix" + description: """ + Fixed an issue where directory secret backends failed to resolve secrets organized in subdirectories + (e.g., Kubernetes mounted secrets at paths like: `/secrets/my-secrets/username`) + """ + contributors: ["pront", "vparfonov"] + }, + { + type: "fix" + description: """ + Fixed `vector test` printing literal `\\x1b` escape codes instead of rendering ANSI colors when reporting VRL compilation errors. + """ + contributors: ["thomasqueirozb"] + }, + { + type: "feat" + description: """ + Added inode metrics to the `host_metrics` source filesystem collector on unix systems. The `filesystem_inodes_total`, `filesystem_inodes_free`, `filesystem_inodes_used`, and `filesystem_inodes_used_ratio` metrics are now available. + """ + contributors: ["mushrowan"] + }, + { + type: "enhancement" + description: """ + Upgrades the syslog encoding transform with three major improvements: + + Structured Data Enhancements (RFC 5424): + + - Supports scalars + - Handles nested objects (flattened with dot notation) + - Serializes arrays as JSON strings, e.g., `tags="[\"tag1\",\"tag2\",\"tag3\"]"` (RFC 5424 spec doesn't define how to handle arrays in structured data) + - Validates SD-ID and PARAM-NAME fields per RFC 5424 + - Sanitizes invalid characters to underscores + + UTF-8 Safety Fix: + + - Fixes panics from byte-based truncation on multibyte characters + - Implements character-based truncation for all fields + - Prevents crashes with emojis, Cyrillic text, etc. + + RFC 3164 Compliance Improvements: + + - Bug fix: Structured data is now properly ignored (previously incorrectly prepended) + - TAG field sanitized to ASCII printable characters (33-126) + - Adds debug logging when structured data is ignored + """ + contributors: ["vparfonov"] + }, + { + type: "enhancement" + description: """ + The `arrow_stream` codec now uses `arrow-json` instead of `serde_arrow` for Arrow encoding. + """ + contributors: ["benjamin-awd"] + }, + { + type: "feat" + description: """ + The `azure_blob` sink now supports routing requests through HTTP/HTTPS proxies, enabling uploads from restricted networks that require an outbound proxy. + """ + contributors: ["joshuacoughlan"] + }, + { + type: "enhancement" + description: """ + Added the `component_latency_seconds` histogram and + `component_latency_mean_seconds` gauge internal metrics, exposing the time an + event spends in a single transform including the transform buffer. + """ + contributors: ["bruceg"] + }, + { + type: "enhancement" + description: """ + The `datadog_logs` sink now defaults to `zstd` compression instead of no compression. This results in + better network efficiency and higher throughput. You can explicitly set `compression = "none"` to + restore the previous behavior of no compression, or set `compression = "gzip"` if you were previously + using gzip compression explicitly. + """ + contributors: ["jszwedko", "pront"] + }, + { + type: "enhancement" + description: """ + Add `content_encoding` and `cache_control` options to the `gcp_cloud_storage` sink. `content_encoding` overrides the `Content-Encoding` header (defaults to the compression scheme's content encoding). `cache_control` sets the `Cache-Control` header for created objects. + """ + contributors: ["benjamin-awd"] + }, + { + type: "fix" + description: """ + The `opentelemetry` source now correctly uses `Definition::any()` for logs output schema when `use_otlp_decoding` is enabled. + Users can now enable schema validation for this source. + """ + contributors: ["pront"] + }, + { + type: "enhancement" + description: """ + Small optimization to the `websocket` source performance by avoiding getting a new time for every event in an array. + """ + contributors: ["bruceg"] + }, + { + type: "enhancement" + description: """ + The `prometheus_remote_write` sink now supports custom HTTP headers via the `request.headers` configuration option. This allows users to add custom headers to outgoing requests, which is useful for authentication, routing, or other integration requirements with Prometheus-compatible backends. + """ + contributors: ["elohmeier"] + }, + { + type: "chore" + description: """ + Removed the misleadingly-named `default-no-vrl-cli` feature flag, which did not control VRL CLI compilation. + This flag was equivalent to `default` without `api-client` and `enrichment-tables`. + Use `default-no-api-client` as a replacement (note: this includes `enrichment-tables`) or define custom features as needed. + """ + contributors: ["thomasqueirozb"] + }, + { + type: "enhancement" + description: """ + Added `internal_metrics` configuration section to the `tag_cardinality_limit` transform to better organize internal metrics configuration. The `internal_metrics.include_extended_tags` option controls whether to include extended tags (`metric_name`, `tag_key`) in the `tag_value_limit_exceeded_total` metric to help identify which specific metrics and tag keys are hitting the configured value limit. This option defaults to `false` because these tags have potentially unbounded cardinality. + """ + contributors: ["kaarolch"] + }, + { + type: "chore" + description: """ + The `*buffer_utilization_mean` metrics have been enhanced to use time-weighted + averaging which make them more representative of the actual buffer utilization + over time. + + This change is breaking due to the replacement of the existing + `buffer_utilization_ewma_alpha` config option with + `buffer_utilization_ewma_half_life_seconds`. + """ + contributors: ["bruceg"] + }, + ] + + vrl_changelog: """ + ### [0.31.0 (2026-03-05)] + + #### New Features + + - Added a new `parse_yaml` function. This function parses yaml according to the [YAML 1.1 spec](https://yaml.org/spec/1.1/). + + authors: juchem (https://github.com/vectordotdev/vrl/pull/1602) + - Added `--quiet` / `-q` flag to the CLI to suppress the banner text when starting the REPL. + + authors: thomasqueirozb (https://github.com/vectordotdev/vrl/pull/1617) + + #### Fixes + + - Fixed a bug where lexer parse errors would emit a generic span with 202 error code instead of the + proper error. Also fixed error positions from nested lexers (e.g., string literals inside function + arguments) to correctly point to the actual location in the source. + + Before (generic E202 syntax error): + + ```text + $ string("\a") + + error[E202]: syntax error + ┌─ :1:1 + │ + 1 │ string("\a") + │ ^^^^^^^^^^^^ unexpected error: invalid escape character: \a + │ + = see language documentation at https://vrl.dev + = try your code in the VRL REPL, learn more at https://vrl.dev/examples + ``` + + After (correct E209 invalid escape character): + + ```text + $ string("\a") + + error[E209]: invalid escape character: \a + ┌─ :1:10 + │ + 1 │ string("\a") + │ ^ invalid escape character: a + │ + = see language documentation at https://vrl.dev + = try your code in the VRL REPL, learn more at https://vrl.dev/examples + ``` + + authors: thomasqueirozb (https://github.com/vectordotdev/vrl/pull/1579) + - Fixed a bug where `parse_duration` panicked when large values overflowed during multiplication. + The function now returns an error instead. + + authors: thomasqueirozb (https://github.com/vectordotdev/vrl/pull/1618) + - Corrected the type definition of the `basename` function to indicate that it can also return `null`. + Previously the type definition indicated that the function could only return bytes (or strings). + + authors: thomasqueirozb (https://github.com/vectordotdev/vrl/pull/1635) + - Fixed incorrect parameter types in several stdlib functions: + + - `md5`: `value` parameter was typed as `any`, now correctly typed as `bytes`. + - `seahash`: `value` parameter was typed as `any`, now correctly typed as `bytes`. + - `floor`: `value` parameter was typed as `any`, now correctly typed as `float | integer`; `precision` parameter was typed as `any`, now correctly typed as `integer`. + - `parse_key_value`: `key_value_delimiter` and `field_delimiter` parameters were typed as `any`, now correctly typed as `bytes`. + + Note: the function documentation already reflected the correct types. + + authors: thomasqueirozb (https://github.com/vectordotdev/vrl/pull/1650) + + ### [0.30.0 (2026-01-22)] + """ + + commits: [ + {sha: "c5f899575441a15598a76dd10c785074de93a0f7", date: "2026-01-28 19:32:30 UTC", description: "v0.53.0 release", pr_number: 24560, scopes: ["releasing"], type: "chore", breaking_change: false, author: "Thomas", files_count: 45, insertions_count: 311, deletions_count: 99}, + {sha: "7594b8adfd35cb43102ca0bde6aaf18fce14f5b1", date: "2026-01-28 22:20:15 UTC", description: "Add custom instrumentation hook", pr_number: 24558, scopes: ["buffers"], type: "chore", breaking_change: false, author: "Bruce Guenter", files_count: 2, insertions_count: 32, deletions_count: 9}, + {sha: "0af6553bc542c1191aa94e48c14af4c50d4d588c", date: "2026-01-29 05:03:24 UTC", description: "match cla link to gh workflow", pr_number: 24565, scopes: ["internal"], type: "docs", breaking_change: false, author: "eldondevat", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "22e92a603ea2ddcd660752a1a774d459e6b74607", date: "2026-01-28 23:19:40 UTC", description: "Refactor EWMA + Gauge into a new struct", pr_number: 24556, scopes: ["observability"], type: "chore", breaking_change: false, author: "Bruce Guenter", files_count: 4, insertions_count: 45, deletions_count: 16}, + {sha: "2da8b249974a82b9cfc9e1cd65ab0ca833dc1be5", date: "2026-01-28 21:32:08 UTC", description: "Use correct keys for histogram/summary", pr_number: 24394, scopes: ["log_to_metric transform"], type: "fix", breaking_change: false, author: "Javier Blazquez", files_count: 3, insertions_count: 34, deletions_count: 31}, + {sha: "5f1efed7b0eabfb3a4c189acc377ce235ad69d01", date: "2026-01-30 20:35:50 UTC", description: "Update crates and migrate to the new SDK", pr_number: 24255, scopes: ["azure_blob sink"], type: "feat", breaking_change: false, author: "Josh Coughlan", files_count: 14, insertions_count: 1046, deletions_count: 412}, + {sha: "9ba83a1c7e575793bba2d0b25b0787e6deddda54", date: "2026-01-30 22:20:55 UTC", description: "Two tiny optimizations", pr_number: 24520, scopes: ["sample transform"], type: "enhancement", breaking_change: false, author: "Bruce Guenter", files_count: 1, insertions_count: 31, deletions_count: 44}, + {sha: "25c281903acf9db0fb7ae34c7f79f53f6535f2f7", date: "2026-01-30 22:21:57 UTC", description: "Micro-optimize send loop", pr_number: 24555, scopes: ["websocket source"], type: "enhancement", breaking_change: false, author: "Bruce Guenter", files_count: 2, insertions_count: 8, deletions_count: 4}, + {sha: "94221283e89043533f4711175c75fa423fed076e", date: "2026-01-31 01:33:33 UTC", description: "minor release template improvements", pr_number: 24575, scopes: ["releasing"], type: "chore", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 2, deletions_count: 1}, + {sha: "88638eebf8fbc467c4e158237a9ebd86e81a0ec7", date: "2026-02-03 01:44:52 UTC", description: "bump actions/cache from 5.0.1 to 5.0.3", pr_number: 24579, scopes: ["ci"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 3, insertions_count: 6, deletions_count: 6}, + {sha: "e512d47c6dc80269696bacf9b3b619bca57b20ab", date: "2026-02-02 20:57:55 UTC", description: "bump actions/checkout from 6.0.1 to 6.0.2", pr_number: 24581, scopes: ["ci"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 26, insertions_count: 73, deletions_count: 73}, + {sha: "bae894dcd8912e0cb0774b602abcea2dc4213f12", date: "2026-02-02 18:20:36 UTC", description: "allow environment interpolation from http provider config", pr_number: 24341, scopes: ["http provider"], type: "enhancement", breaking_change: false, author: "John Sonnenschein", files_count: 1, insertions_count: 29, deletions_count: 3}, + {sha: "aba5fb479de401da11c512e734c7f23e295d9766", date: "2026-02-02 21:33:28 UTC", description: "bump github/codeql-action from 4.31.9 to 4.32.0", pr_number: 24580, scopes: ["ci"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "1040f7844210c105233b792624b9949a63b76ee2", date: "2026-02-02 21:37:40 UTC", description: "bump docker/login-action from 3.6.0 to 3.7.0", pr_number: 24582, scopes: ["ci"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 4, insertions_count: 6, deletions_count: 6}, + {sha: "4a1eda5bcd3d68cbc73d89dd0e0d9ef0d94cfd54", date: "2026-02-03 11:38:28 UTC", description: "add support for Arrow complex types", pr_number: 24409, scopes: ["clickhouse sink"], type: "enhancement", breaking_change: false, author: "Benjamin Dornel", files_count: 11, insertions_count: 1783, deletions_count: 1846}, + {sha: "f77ab8ac21766d11ab6545c0759d11e649533ab5", date: "2026-02-02 22:59:19 UTC", description: "bump clap from 4.5.53 to 4.5.56 in the clap group across 1 directory", pr_number: 24500, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 8, deletions_count: 8}, + {sha: "0ddd610f28cd9c563c29dff69a328302ff205fc4", date: "2026-02-03 04:06:44 UTC", description: "bump the tokio group with 5 updates", pr_number: 24485, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 9, insertions_count: 49, deletions_count: 112}, + {sha: "cb701eb23bdad0fac0f476687fd4e7cf1539d160", date: "2026-02-03 19:12:12 UTC", description: "Remove orphaned audit.yml", pr_number: 24584, scopes: ["ci"], type: "chore", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 0, deletions_count: 18}, + {sha: "d97292c0c5c3d91213bca60920ae843409ffec1a", date: "2026-02-04 01:41:14 UTC", description: "bump bytes from 1.10.1 to 1.11.1", pr_number: 24587, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 4, insertions_count: 100, deletions_count: 100}, + {sha: "383c2ffa7ab7af97ffba2df438cbf24b3e0a4d88", date: "2026-02-05 03:15:48 UTC", description: "bump git2 from 0.20.2 to 0.20.4", pr_number: 24598, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 5, deletions_count: 5}, + {sha: "382437064afeb1a98121d927028852bb89204ea6", date: "2026-02-06 21:00:30 UTC", description: "Refactor output types into sub-module", pr_number: 24604, scopes: ["transforms"], type: "chore", breaking_change: false, author: "Bruce Guenter", files_count: 2, insertions_count: 364, deletions_count: 359}, + {sha: "9338ee0ba39491a1a676e162109ad75decac825f", date: "2026-02-06 22:11:36 UTC", description: "Bump vrl and add description to parameters", pr_number: 24597, scopes: ["vrl"], type: "chore", breaking_change: false, author: "Thomas", files_count: 11, insertions_count: 26, deletions_count: 1}, + {sha: "20360ac25e7a79730dd7ab39f5ca09ef48bfe52b", date: "2026-02-07 00:10:04 UTC", description: "bump time from 0.3.44 to 0.3.47", pr_number: 24608, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 9, deletions_count: 9}, + {sha: "e6df5ba13b847c00b90f0d6e03066c1946f97b4d", date: "2026-02-07 00:17:37 UTC", description: "Refactor transform builders into methods", pr_number: 24605, scopes: ["topology"], type: "chore", breaking_change: false, author: "Bruce Guenter", files_count: 1, insertions_count: 149, deletions_count: 149}, + {sha: "2a8183a52393f46f51461b918175f893555fc273", date: "2026-02-07 06:34:41 UTC", description: "add defaults to Parameters and internal_failure_reasons to functions", pr_number: 24613, scopes: ["vrl"], type: "chore", breaking_change: false, author: "Thomas", files_count: 12, insertions_count: 226, deletions_count: 154}, + {sha: "7cd3395e3049e1ce53d43e4aaf41415355c77bc4", date: "2026-02-10 05:11:50 UTC", description: "add scrolling, sorting and filtering to `vector top`", pr_number: 24355, scopes: ["cli"], type: "feat", breaking_change: false, author: "Ensar Sarajčić", files_count: 11, insertions_count: 926, deletions_count: 29}, + {sha: "1404ec385fa456742a1785d764f675762510057f", date: "2026-02-10 20:31:56 UTC", description: "bump axios from 1.13.2 to 1.13.5 in /website", pr_number: 24622, scopes: ["website deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 13, deletions_count: 13}, + {sha: "5e8715bfe91d16cdafec4c4736863b39ec164617", date: "2026-02-10 23:32:54 UTC", description: "bump diff from 4.0.2 to 4.0.4 in /website", pr_number: 24519, scopes: ["website deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 3, deletions_count: 3}, + {sha: "b523c6d677561fbfde8044680df33f706f844bc6", date: "2026-02-11 01:42:57 UTC", description: "Add transform latency metrics", pr_number: 24627, scopes: ["observability"], type: "enhancement", breaking_change: false, author: "Bruce Guenter", files_count: 24, insertions_count: 751, deletions_count: 334}, + {sha: "726c383d704fb0e9018ccdc91b9d08831d499f21", date: "2026-02-11 07:59:20 UTC", description: "collect inode metrics", pr_number: 24625, scopes: ["host_metrics source"], type: "feat", breaking_change: false, author: "rowan", files_count: 3, insertions_count: 59, deletions_count: 7}, + {sha: "d360a8e8c1ee239e1428479cc0203da839eca230", date: "2026-02-11 09:11:18 UTC", description: "add support for `healthcheck.uri`", pr_number: 24603, scopes: ["prometheus_remote_write sink"], type: "enhancement", breaking_change: false, author: "Simon", files_count: 3, insertions_count: 13, deletions_count: 2}, + {sha: "8517809dad1f4ffdf9808fcf6992a70935951ffb", date: "2026-02-12 02:15:07 UTC", description: "[web-8160] upgrade typesense-sync to be v30 compatible", pr_number: 24640, scopes: ["website"], type: "chore", breaking_change: false, author: "Reda El Issati", files_count: 5, insertions_count: 259, deletions_count: 144}, + {sha: "5e02189cc4be072b8261fc9c3d1152f273cb5345", date: "2026-02-12 03:15:46 UTC", description: "Add missing newline to typesense-sync.ts", pr_number: 24642, scopes: ["website"], type: "chore", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 2, deletions_count: 1}, + {sha: "5c8a811807ed5412fd45b9b01a58290591796ce6", date: "2026-02-12 06:40:54 UTC", description: "Bump vrl and add return_kind to functions", pr_number: 24614, scopes: ["vrl"], type: "chore", breaking_change: false, author: "Thomas", files_count: 11, insertions_count: 41, deletions_count: 1}, + {sha: "d10f3a5b892833232002c97f24bf008dbef1eeec", date: "2026-02-13 10:37:50 UTC", description: "Bump VRL and implement category for functions", pr_number: 24653, scopes: ["vrl"], type: "chore", breaking_change: false, author: "Thomas", files_count: 17, insertions_count: 93, deletions_count: 1}, + {sha: "3a73af45d530d5a7b95a34e1cfdb1d6213dd0274", date: "2026-02-14 03:50:40 UTC", description: "Add proxy support", pr_number: 24256, scopes: ["azure_blob sink"], type: "feat", breaking_change: false, author: "Josh Coughlan", files_count: 6, insertions_count: 41, deletions_count: 11}, + {sha: "b44dfee88b59b40d6602d68e3e18541d01954d46", date: "2026-02-18 02:14:02 UTC", description: "bump the patches group across 1 directory with 34 updates", pr_number: 24645, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 14, insertions_count: 451, deletions_count: 483}, + {sha: "692704adc1948e9a90e0dc51b52b557fa4e79619", date: "2026-02-18 02:14:04 UTC", description: "bump the aws group across 1 directory with 7 updates", pr_number: 24588, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 22, deletions_count: 34}, + {sha: "cc664d665ad55c223be788fc5281d28d7c14374a", date: "2026-02-17 22:26:59 UTC", description: "remove default-no-vrl-cli", pr_number: 24672, scopes: ["feature flags"], type: "chore", breaking_change: true, author: "Thomas", files_count: 3, insertions_count: 6, deletions_count: 1}, + {sha: "a51820e73030c65ede9e3afef252386746d5445d", date: "2026-02-18 00:19:16 UTC", description: "add integration tests for the top command", pr_number: 24649, scopes: ["dev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 5, insertions_count: 663, deletions_count: 1}, + {sha: "76c78377ea3006c2dc29a740fdee6206b383566a", date: "2026-02-18 01:56:39 UTC", description: "bump the tracing group across 1 directory with 4 updates", pr_number: 24671, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 4, insertions_count: 106, deletions_count: 106}, + {sha: "9aeae23363d1766542cb28ea3315b305cd84599d", date: "2026-02-18 19:26:19 UTC", description: "update cargo-deny to support CVSS version 4", pr_number: 24678, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 2, insertions_count: 2, deletions_count: 2}, + {sha: "31ded018c26436b74b7526cb0509eecb0e57fd5a", date: "2026-02-19 03:31:27 UTC", description: "hardcode DOCKER_API_VERSION=1.44 in amazon-ecs-local-container-endpoints", pr_number: 24684, scopes: ["dev"], type: "fix", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 2, deletions_count: 0}, + {sha: "26ae601da3e871711ec2922e7c0879155f1ec616", date: "2026-02-19 04:58:14 UTC", description: "update keccak to fix cargo-deny check", pr_number: 24679, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 2, deletions_count: 2}, + {sha: "0b25c7698393f42e09bf79858782cebb027b36e2", date: "2026-02-19 04:37:36 UTC", description: "do not skip the IT suite when ran manually", pr_number: 24683, scopes: ["ci"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 11, deletions_count: 7}, + {sha: "6724b0a8f2af5403bdbe30d315895e3c434c5ed5", date: "2026-02-19 05:36:01 UTC", description: "Update num-bigint-dig 0.8.4 -> 0.8.6 to resolve future incompatibilities", pr_number: 24664, scopes: ["deps"], type: "chore", breaking_change: false, author: "zapdos26", files_count: 1, insertions_count: 2, deletions_count: 3}, + {sha: "2a220496a7b5af8ff94e25b0aaa1753a39182ec1", date: "2026-02-19 05:38:18 UTC", description: "bump VRL and use Parameter builder", pr_number: 24681, scopes: ["vrl"], type: "chore", breaking_change: false, author: "Thomas", files_count: 11, insertions_count: 131, deletions_count: 180}, + {sha: "395c85f5ce2a99b325d7d4d07ef1c39b2a7ec1fa", date: "2026-02-19 19:17:44 UTC", description: "bump the csv group with 2 updates", pr_number: 24431, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 6, deletions_count: 6}, + {sha: "a6e37ca053dc8501113c37d1dad2b74555c6ef95", date: "2026-02-19 20:38:15 UTC", description: "Record buffer utilization on receive", pr_number: 24650, scopes: ["observability"], type: "fix", breaking_change: false, author: "Bruce Guenter", files_count: 3, insertions_count: 102, deletions_count: 33}, + {sha: "90e3342359e1a59e61f2c0ad1de2cc6bf9c07fd9", date: "2026-02-19 23:00:20 UTC", description: "reference issue #24687 above DOCKER_API_VERSION hack", pr_number: 24696, scopes: ["dev"], type: "chore", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 1, deletions_count: 0}, + {sha: "e487d6ed6fa413f2ced27780227424f02472c799", date: "2026-02-19 23:36:15 UTC", description: "Fix K8s E2E test failures", pr_number: 24694, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 2, insertions_count: 16, deletions_count: 4}, + {sha: "64caf5ee3b3e0e318c4f1a40dbc68cf6182b8d02", date: "2026-02-20 01:59:56 UTC", description: "Add K8s-related scripts to K8s change detection filter", pr_number: 24698, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 3, deletions_count: 0}, + {sha: "bd79aeca7946844a2fa29720cac34a9f62717e71", date: "2026-02-20 09:28:46 UTC", description: "add custom HTTP headers support", pr_number: 23962, scopes: ["prometheus_remote_write sink"], type: "feat", breaking_change: false, author: "elohmeier", files_count: 6, insertions_count: 172, deletions_count: 18}, + {sha: "adf1aba42b99acfb880f466d6f091a31b43d201d", date: "2026-02-21 10:34:40 UTC", description: "Initial `azure_logs_ingestion` sink", pr_number: 22912, scopes: ["azure_logs_ingestion sink"], type: "feat", breaking_change: false, author: "Jed Laundry", files_count: 19, insertions_count: 1832, deletions_count: 3}, + {sha: "cfad8051e27bd8682c27dbf4c085b7839fbc4615", date: "2026-02-20 20:41:45 UTC", description: "bump k8s and minikube versions", pr_number: 24699, scopes: ["deps"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 2, insertions_count: 7, deletions_count: 7}, + {sha: "89652601e0c512f6af52f048d9c53fdc38b826fa", date: "2026-02-20 19:45:58 UTC", description: "Fix race draining a memory buffer", pr_number: 24695, scopes: ["buffers"], type: "fix", breaking_change: false, author: "Bruce Guenter", files_count: 2, insertions_count: 78, deletions_count: 1}, + {sha: "2cbcc1672771848cb5d9a543961c8e39b664f653", date: "2026-02-20 21:56:49 UTC", description: "default to zstd compression", pr_number: 19456, scopes: ["datadog_logs sink"], type: "enhancement", breaking_change: false, author: "Doug Smith", files_count: 8, insertions_count: 109, deletions_count: 26}, + {sha: "81e546cda71a6b903c3fc7b631518a9b338173a6", date: "2026-02-20 23:43:17 UTC", description: "update tracing in cargo lock", pr_number: 24703, scopes: ["deps"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "68609dff0976bf0a60c1bbf25edc028d1432d085", date: "2026-02-21 00:11:09 UTC", description: "add dep update choice to PR template", pr_number: 24704, scopes: ["dev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 5, deletions_count: 3}, + {sha: "c101e3c696dafcad94bdbe612c8b08b3a8214d9c", date: "2026-02-21 00:39:58 UTC", description: "upload test results for make commands that use nextest", pr_number: 24680, scopes: ["ci"], type: "feat", breaking_change: false, author: "Pavlos Rontidis", files_count: 5, insertions_count: 19, deletions_count: 40}, + {sha: "cbbae138a299b6fd380d2b2d56d51bc1d858e264", date: "2026-02-21 01:18:43 UTC", description: "introduce AGENTS.md", pr_number: 23858, scopes: ["dev"], type: "feat", breaking_change: false, author: "Pavlos Rontidis", files_count: 3, insertions_count: 365, deletions_count: 0}, + {sha: "5ce1198a2a4efef0102f8d401670215a3ca15612", date: "2026-02-21 06:55:24 UTC", description: "remove tokio-util patch dependency", pr_number: 24658, scopes: ["deps"], type: "fix", breaking_change: false, author: "Zyad Haddad", files_count: 21, insertions_count: 251, deletions_count: 53}, + {sha: "9465cec953ec69c6c213d30c8a5737805647cb69", date: "2026-02-21 03:15:48 UTC", description: "Time-weight buffer utilization means", pr_number: 24697, scopes: ["observability"], type: "enhancement", breaking_change: true, author: "Bruce Guenter", files_count: 13, insertions_count: 236, deletions_count: 55}, + {sha: "359b646c853c96eb5ca7ad81c1cbd88915c431a5", date: "2026-02-21 06:05:06 UTC", description: "fix top tests features and run them on CI", pr_number: 24677, scopes: ["dev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 9, insertions_count: 35, deletions_count: 5}, + {sha: "cb2112601d0b0a424b082a06c53fe3cd8879790e", date: "2026-02-23 19:27:36 UTC", description: "remove 'type: bug' label (now using 'type: Bug')", pr_number: 24711, scopes: ["dev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 0, deletions_count: 2}, + {sha: "0a37c8cc251b63dd7a199174f3c06f4c729ad4cf", date: "2026-02-23 19:57:48 UTC", description: "remove 'type: feature' label (now using 'type: Feature')", pr_number: 24713, scopes: ["dev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 0, deletions_count: 2}, + {sha: "541819c67b8e53e6fc5801468c2cf1bfbd456465", date: "2026-02-23 19:57:48 UTC", description: "remove last instance of BoxService", pr_number: 24707, scopes: ["splunk service"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 10, deletions_count: 5}, + {sha: "89b2c832552a2d63720c648a52c3b503968a5e6c", date: "2026-02-23 19:59:51 UTC", description: "use cargo hack to perform single feature compilation checks", pr_number: 23961, scopes: ["ci"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 5, insertions_count: 45, deletions_count: 113}, + {sha: "717a5690344c414c47e902f57bb3a8795c8cd54d", date: "2026-02-23 20:01:45 UTC", description: "install correct deny version", pr_number: 24712, scopes: ["ci"], type: "fix", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "82786f550309cd39d5c488c4dad3ed5c4da4ffa0", date: "2026-02-23 20:51:11 UTC", description: "bump vdev version", pr_number: 24714, scopes: ["vdev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 2, insertions_count: 2, deletions_count: 2}, + {sha: "023b6f7d26a771a516c42edd24a439b393f1fba2", date: "2026-02-23 21:50:14 UTC", description: "fix various inconsistencies", pr_number: 24715, scopes: ["vrl"], type: "docs", breaking_change: false, author: "Thomas", files_count: 5, insertions_count: 114, deletions_count: 26}, + {sha: "dae71aee6c9a6afeb199d4fbb6a40af0333f897b", date: "2026-02-23 22:47:46 UTC", description: "declare versions in one place one - DRY", pr_number: 24716, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 2, insertions_count: 68, deletions_count: 85}, + {sha: "41324d241bfb93cb6a699e29008a44d1087111bd", date: "2026-02-23 23:02:59 UTC", description: "remove windows build jobs restriction", pr_number: 24717, scopes: ["ci"], type: "enhancement", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 0, deletions_count: 6}, + {sha: "ef05c2106c77125e898d4dd6557ae67994d87b11", date: "2026-02-24 00:22:39 UTC", description: "bump datadog-ci version", pr_number: 24718, scopes: ["ci"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "724bb428767c185d949686be1f77cd98288cb412", date: "2026-02-24 18:11:26 UTC", description: "add content_encoding and cache_control options", pr_number: 24506, scopes: ["gcp_cloud_storage sink"], type: "feat", breaking_change: false, author: "Benjamin Dornel", files_count: 4, insertions_count: 180, deletions_count: 4}, + {sha: "75c1c62ea0e9fda8dbc89ef7fdaf812a09537bb9", date: "2026-02-25 01:00:22 UTC", description: "bump nix to 0.31 and remove patch dependency", pr_number: 24725, scopes: ["deps"], type: "chore", breaking_change: false, author: "Thomas", files_count: 4, insertions_count: 28, deletions_count: 33}, + {sha: "148f035c86697697cdbb883b738a191d2918afb6", date: "2026-02-25 01:05:38 UTC", description: "render VRL examples' input", pr_number: 24726, scopes: ["website"], type: "feat", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 11, deletions_count: 1}, + {sha: "3c2bc02c91e7923aa6c01c68659f0ac5e9a45db1", date: "2026-02-25 02:00:40 UTC", description: "added tap tests", pr_number: 24724, scopes: ["dev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 13, insertions_count: 676, deletions_count: 193}, + {sha: "b1359dc7f17d269d8a30b825653cda3ea29678ee", date: "2026-02-25 19:04:51 UTC", description: "bundle dependabot aws-* security updates", pr_number: 24732, scopes: ["ci"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 4, deletions_count: 0}, + {sha: "e3c52276ba8063c4edc21e20563e86ca53f83b1b", date: "2026-02-25 20:25:22 UTC", description: "set DD_API_KEY in test-make-command.yml to upload test results", pr_number: 24764, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 2, deletions_count: 0}, + {sha: "8cd6703f687fa8747bb2de495d5ed15658b1a4ed", date: "2026-02-26 02:57:44 UTC", description: "wrap enrichment errors in a custom type", pr_number: 24495, scopes: ["enrichment tables"], type: "chore", breaking_change: false, author: "Yoenn Burban", files_count: 10, insertions_count: 161, deletions_count: 83}, + {sha: "4738d4794f7ec6f9f038a36f5c2c420345ec2224", date: "2026-02-26 01:28:43 UTC", description: "bump colored from 3.0.0 to 3.1.1", pr_number: 24762, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 6, deletions_count: 6}, + {sha: "6462709411820963cb5e60e7f6d1fd3a419746d5", date: "2026-02-26 02:01:49 UTC", description: "bump github/codeql-action from 4.32.0 to 4.32.4", pr_number: 24735, scopes: ["ci"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "64a292997b2c156c7e1353bd68be055cae83e5e8", date: "2026-02-25 21:19:08 UTC", description: "bump the tower group across 1 directory with 2 updates", pr_number: 24501, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 11, deletions_count: 38}, + {sha: "d83a7f4d9b062a0f959cd3fb94a4aef47a471ffa", date: "2026-02-26 09:22:02 UTC", description: "add support for default columns", pr_number: 24692, scopes: ["clickhouse sink"], type: "fix", breaking_change: false, author: "Benjamin Dornel", files_count: 3, insertions_count: 249, deletions_count: 51}, + {sha: "3ec39d22f937d74e38e63f7894d3bfbdcbef437c", date: "2026-02-26 03:27:14 UTC", description: "bump proptest from 1.8.0 to 1.10.0", pr_number: 24752, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 5, insertions_count: 38, deletions_count: 12}, + {sha: "84599a6576883efb7ae86fdaf651ce4f05d20a18", date: "2026-02-26 03:31:06 UTC", description: "bump data-encoding from 2.9.0 to 2.10.0", pr_number: 24751, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 3, deletions_count: 3}, + {sha: "0cf89052406cd96cebbb8d05a14711870ab971e9", date: "2026-02-26 03:42:37 UTC", description: "bump toml_edit from 0.22.27 to 0.23.9", pr_number: 24759, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 15, deletions_count: 2}, + {sha: "ec40b258134aae68644beac2c0fadbd8c0deb23b", date: "2026-02-26 05:20:33 UTC", description: "bump the aws group with 2 updates", pr_number: 24738, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 4, deletions_count: 4}, + {sha: "f839e42f8e52b9ab21421cd65e62a279b8dffe86", date: "2026-02-26 01:27:05 UTC", description: "bump the clap group with 2 updates", pr_number: 24739, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 10, deletions_count: 10}, + {sha: "10a5ca2c4c29c0ffadc2b9e5e5d45effcb70e18f", date: "2026-02-26 01:32:45 UTC", description: "add toml to codecs dev-dependencies", pr_number: 24766, scopes: ["dev"], type: "fix", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 1, deletions_count: 0}, + {sha: "76c637fb00259106a08633d66d195aa1f0587b25", date: "2026-02-26 06:33:36 UTC", description: "bump arc-swap from 1.7.1 to 1.8.2", pr_number: 24749, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 6, deletions_count: 3}, + {sha: "61c2b5b84b04df0ac994cf66b4691f9095d89edf", date: "2026-02-26 21:50:11 UTC", description: "consolidate features", pr_number: 24637, scopes: ["feature flags"], type: "chore", breaking_change: true, author: "Thomas", files_count: 1, insertions_count: 21, deletions_count: 14}, + {sha: "a367fc0e9a7fd5371e71a943c838107fa5697427", date: "2026-02-26 22:56:19 UTC", description: "distribute MIT-0 and Unicode-3.0 licenses", pr_number: 24775, scopes: ["releasing"], type: "chore", breaking_change: false, author: "Thomas", files_count: 2, insertions_count: 55, deletions_count: 0}, + {sha: "53670b1a02b1a638956118ebfd6a231b4e24415a", date: "2026-02-27 05:05:43 UTC", description: "bump the patches group with 9 updates", pr_number: 24737, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 7, insertions_count: 123, deletions_count: 124}, + {sha: "6dacc589c4c225283e53be36b0e095fed24738a0", date: "2026-02-27 06:41:45 UTC", description: "Add metric and tag name to tag_value_limit_exceeded_total metric", pr_number: 24236, scopes: ["tag_cardinality_limit transform"], type: "enhancement", breaking_change: false, author: "Karol Chrapek", files_count: 7, insertions_count: 113, deletions_count: 7}, + {sha: "7122b6871ed53176b5ce2bf529a376cc1a86b52d", date: "2026-02-27 06:48:57 UTC", description: "Add cue fmt during documentation generation", pr_number: 24771, scopes: ["external docs"], type: "fix", breaking_change: false, author: "Karol Chrapek", files_count: 1, insertions_count: 1, deletions_count: 0}, + {sha: "1487b5304089467126006685df4d5684be8d0229", date: "2026-02-27 13:57:30 UTC", description: "replace `serde_arrow` with `arrow-json`", pr_number: 24661, scopes: ["codecs"], type: "enhancement", breaking_change: false, author: "Benjamin Dornel", files_count: 10, insertions_count: 352, deletions_count: 385}, + {sha: "331257fea2f875b67ac89a5a94c506dbadc56389", date: "2026-02-27 02:00:21 UTC", description: "Remove CI-only formatting check from check-docs.sh", pr_number: 24777, scopes: ["ci"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 15, deletions_count: 14}, + {sha: "cb4a60a20ccbc2301850dbc8a0d513334a880bf1", date: "2026-02-27 20:06:39 UTC", description: "Export some transform config types", pr_number: 24776, scopes: ["transforms"], type: "chore", breaking_change: false, author: "Bruce Guenter", files_count: 4, insertions_count: 54, deletions_count: 63}, + {sha: "63acf11933309335982052e0109f07d961bfc388", date: "2026-02-28 05:59:35 UTC", description: "advanced syslog Structured Data & RFC compliance fixes", pr_number: 24662, scopes: ["codecs"], type: "enhancement", breaking_change: false, author: "Vitalii Parfonov", files_count: 3, insertions_count: 458, deletions_count: 77}, + {sha: "ff4ebc74d4b069cd8e30e21c2b64f9c93afb67e4", date: "2026-02-27 23:47:28 UTC", description: "simplify publish workflow by consolidating duplicated jobs", pr_number: 24778, scopes: ["ci"], type: "chore", breaking_change: false, author: "Thomas", files_count: 2, insertions_count: 45, deletions_count: 422}, + {sha: "0a32a064f9c705cdd663c6314e2fcf9278a29c37", date: "2026-02-28 05:29:11 UTC", description: "bump memchr from 2.7.5 to 2.8.0", pr_number: 24755, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 2, deletions_count: 2}, + {sha: "1dd2b998ad7704e4a2d8bdf6b6788c3d12893565", date: "2026-02-28 05:32:52 UTC", description: "bump minimatch from 3.1.2 to 3.1.5 in /website", pr_number: 24783, scopes: ["website deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 3, deletions_count: 3}, + {sha: "4f026ab4903c200133d09e071eab3136f1eb23e0", date: "2026-02-28 05:37:50 UTC", description: "bump derive_more from 2.0.1 to 2.1.1", pr_number: 24744, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 18, deletions_count: 8}, + {sha: "fb645a43e6c2b6b7a9d93bae8b4bcd4615133745", date: "2026-02-28 05:41:39 UTC", description: "bump evmap from 10.0.2 to 11.0.0", pr_number: 24754, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 42, deletions_count: 3}, + {sha: "660e92d553c966f57daa471447ceb3048f24aa91", date: "2026-02-28 05:47:46 UTC", description: "bump smpl_jwt from 0.8.0 to 0.9.0", pr_number: 24757, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 18, deletions_count: 2}, + {sha: "8dfd20d1a4dd2b8f4618f5fd4d296a5caf4c66c2", date: "2026-02-28 06:04:41 UTC", description: "bump bytesize from 2.1.0 to 2.3.1", pr_number: 24758, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 1, insertions_count: 2, deletions_count: 2}, + {sha: "c788f9359dfca3b2363ca99a565a3334455541da", date: "2026-02-28 02:24:47 UTC", description: "restrict GITHUB_TOKEN permissions in workflows", pr_number: 24785, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 4, insertions_count: 16, deletions_count: 6}, + {sha: "33057c8db8a348bc9dca9fb26ed931510a3fcb25", date: "2026-03-03 01:27:01 UTC", description: "expose vrl functions flag", pr_number: 24630, scopes: ["deps"], type: "chore", breaking_change: false, author: "dd-sebastien-lb", files_count: 2, insertions_count: 8, deletions_count: 3}, + {sha: "722586174cdac164249c1939868a0078b7bd90b2", date: "2026-03-03 00:42:05 UTC", description: "update Cargo.lock", pr_number: 24825, scopes: ["deps"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 5, deletions_count: 60}, + {sha: "8b603ddfc38e18c94d7d346c49042f057dfdacae", date: "2026-03-03 08:17:29 UTC", description: "support directory paths with path separators in secret keys", pr_number: 24824, scopes: ["security"], type: "fix", breaking_change: false, author: "Vitalii Parfonov", files_count: 5, insertions_count: 28, deletions_count: 3}, + {sha: "d90916abfd639af236a501cfaf26d7f6e3b8e3e0", date: "2026-03-03 01:22:12 UTC", description: "Delete obsolete LLVM/clang 9 RUSTFLAGS step", pr_number: 24826, scopes: ["internal docs"], type: "chore", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 1, deletions_count: 8}, + {sha: "0b15473a3329819fa538340486309d95571227b2", date: "2026-03-03 01:28:15 UTC", description: "add new component docs guide", pr_number: 24823, scopes: ["internal"], type: "docs", breaking_change: false, author: "Pavlos Rontidis", files_count: 2, insertions_count: 42, deletions_count: 5}, + {sha: "c0fc69ecc3ca6eba0f7601e578c3dc8306f10031", date: "2026-03-03 02:03:15 UTC", description: "specify hugo version", pr_number: 24829, scopes: ["external"], type: "docs", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 2, deletions_count: 1}, + {sha: "f78d95d59e0a1832b0577bc6945a7187a3c0b789", date: "2026-03-03 21:56:13 UTC", description: "various agents md updates", pr_number: 24832, scopes: ["dev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 3, insertions_count: 122, deletions_count: 103}, + {sha: "56dbc78d9d9176ac06b98d875ab64997e8083413", date: "2026-03-04 06:35:55 UTC", description: "bump the patches group with 4 updates", pr_number: 24788, scopes: ["deps"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 2, insertions_count: 22, deletions_count: 22}, + {sha: "7e97ead1775c1cadd882ca176aa2497d8b7ee1b5", date: "2026-03-04 00:16:26 UTC", description: "implement least privilege for GitHub Actions token permissions", pr_number: 24835, scopes: ["ci"], type: "chore", breaking_change: false, author: "Benson Fung", files_count: 20, insertions_count: 121, deletions_count: 32}, + {sha: "ecd132d6cdde1c7e9cea506cbbd37d7af871d116", date: "2026-03-04 21:40:57 UTC", description: "Bump VRL and add check_type_only: false", pr_number: 24836, scopes: ["deps"], type: "chore", breaking_change: false, author: "Thomas", files_count: 6, insertions_count: 10, deletions_count: 9}, + {sha: "51c04e029e1a81c5f036dc14ba0deeb22d5ecb20", date: "2026-03-05 01:10:59 UTC", description: "stop printing literal escaped ANSI codes to output", pr_number: 24843, scopes: ["unit tests"], type: "fix", breaking_change: false, author: "Thomas", files_count: 4, insertions_count: 60, deletions_count: 3}, + {sha: "11cd0a573886db17bb0688063fd91efdde7882c2", date: "2026-03-05 02:20:42 UTC", description: "Enable all vector-vrl-functions features by default", pr_number: 24845, scopes: ["dev"], type: "chore", breaking_change: false, author: "Thomas", files_count: 3, insertions_count: 3, deletions_count: 3}, + {sha: "9ff9d838a5a88a1b0e660ddfcd65283a1fdc1543", date: "2026-03-05 02:21:33 UTC", description: "explicitly enable preserve_order feature for serde_json", pr_number: 24846, scopes: ["deps"], type: "chore", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "7091366c9946363a57f3d6f37e40d9b1822710a4", date: "2026-03-05 02:54:56 UTC", description: "fix source output", pr_number: 24847, scopes: ["opentelemetry source"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 2, insertions_count: 12, deletions_count: 1}, + {sha: "19edb2578b280f6b2ea40d334d75756c55affd16", date: "2026-03-05 18:53:21 UTC", description: "bump the artifact group with 2 updates", pr_number: 24820, scopes: ["ci"], type: "chore", breaking_change: false, author: "dependabot[bot]", files_count: 8, insertions_count: 29, deletions_count: 29}, + {sha: "6c158da72b870f17e7adf7aa07175b8408695fdd", date: "2026-03-05 20:14:41 UTC", description: "fix aggregate_vector_metrics docs and improve enrichment explainer", pr_number: 24849, scopes: ["vrl"], type: "docs", breaking_change: false, author: "Thomas", files_count: 2, insertions_count: 3, deletions_count: 3}, + {sha: "a175af1532b7489c43932fac0d612380a3888dfb", date: "2026-03-06 00:57:57 UTC", description: "fix website token permissions", pr_number: 24853, scopes: ["ci"], type: "fix", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 3, deletions_count: 0}, + {sha: "f119e7883af91d77e711c740316efaa04397604f", date: "2026-03-06 01:35:00 UTC", description: "add disk space cleanup to component features workflow", pr_number: 24852, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 4, deletions_count: 0}, + {sha: "3037b0c55f2d4407291260f914b2d4e82d2b682c", date: "2026-03-06 02:14:01 UTC", description: "move VRL-specific crates under lib/vector-vrl/", pr_number: 24854, scopes: ["dev"], type: "chore", breaking_change: false, author: "Thomas", files_count: 29, insertions_count: 19, deletions_count: 19}, + {sha: "664a0a2cb897bacae2b5f13455626506f85b6072", date: "2026-03-06 02:51:26 UTC", description: "remove ux-team", pr_number: 24850, scopes: ["dev"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 2, deletions_count: 2}, + {sha: "71b993593e28dfc0d2a787d6616fd28733ef19ae", date: "2026-03-06 20:42:44 UTC", description: "remove gardener workflows", pr_number: 24857, scopes: ["ci"], type: "chore", breaking_change: false, author: "Pavlos Rontidis", files_count: 4, insertions_count: 0, deletions_count: 204}, + {sha: "3f17b6f055b84721539c945e5c4f7d5cc826e55c", date: "2026-03-06 21:09:40 UTC", description: "document api as a global option", pr_number: 24858, scopes: ["website"], type: "docs", breaking_change: false, author: "Thomas", files_count: 5, insertions_count: 47, deletions_count: 72}, + {sha: "66e531e3080c96dda03d0ae5ac2845ad98fce728", date: "2026-03-06 21:27:55 UTC", description: "tighten changelog workflow security", pr_number: 24859, scopes: ["ci"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 1, insertions_count: 4, deletions_count: 23}, + {sha: "c65983531e90ed95054477880505feba03dbf9a2", date: "2026-03-07 01:03:28 UTC", description: "update npm CI packages", pr_number: 24861, scopes: ["deps"], type: "chore", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 2, deletions_count: 2}, + {sha: "40e82911e30e798efa497c8149d31db99ae1b729", date: "2026-03-07 01:53:19 UTC", description: "update lading to 0.31.2", pr_number: 24855, scopes: ["ci"], type: "chore", breaking_change: false, author: "George Hahn", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "70a8bfaa65532566c71027246bc1881ea3f7bb2f", date: "2026-03-07 03:01:36 UTC", description: "remove docker dependency from deb/rpm package targets", pr_number: 24864, scopes: ["ci"], type: "chore", breaking_change: false, author: "Thomas", files_count: 4, insertions_count: 14, deletions_count: 15}, + {sha: "1fe79946ecf66cc8bd050b24882edded08b79dbd", date: "2026-03-07 03:15:14 UTC", description: "use VDEV env var in scripts", pr_number: 24862, scopes: ["ci"], type: "chore", breaking_change: false, author: "Thomas", files_count: 11, insertions_count: 41, deletions_count: 15}, + {sha: "46a7035d7630a804fc9865d798319e2707816d4f", date: "2026-03-07 07:35:28 UTC", description: "revert markdownlint version bump", pr_number: 24867, scopes: ["deps"], type: "chore", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "2debb993f31a39968334236bd58e36d6504ee9b0", date: "2026-03-07 19:54:34 UTC", description: "update SMP CLI to 0.26.1", pr_number: 24865, scopes: ["ci"], type: "chore", breaking_change: false, author: "George Hahn", files_count: 1, insertions_count: 1, deletions_count: 1}, + {sha: "b519edd97252ae1dab8210cfc3d164dadee16c9e", date: "2026-03-07 23:41:20 UTC", description: "Automatically generate VRL function documentation", pr_number: 24719, scopes: ["vrl"], type: "docs", breaking_change: false, author: "Thomas", files_count: 240, insertions_count: 1263, deletions_count: 11380}, + {sha: "8dec725817de08b20ac834f5b281d25632e0de09", date: "2026-03-09 19:30:55 UTC", description: "replace check-component-docs with check-generated-docs", pr_number: 24871, scopes: ["internal docs"], type: "fix", breaking_change: false, author: "Thomas", files_count: 2, insertions_count: 7, deletions_count: 7}, + {sha: "c4c802270f75e5674cb6476a32490dceac778732", date: "2026-03-09 21:00:14 UTC", description: "Bump version to 0.3.0", pr_number: 24872, scopes: ["vdev"], type: "chore", breaking_change: false, author: "Thomas", files_count: 3, insertions_count: 3, deletions_count: 3}, + {sha: "79999f6cf8d482e4264c82f4d215a99d88dae8c6", date: "2026-03-09 22:39:27 UTC", description: "render all top level configuration fields", pr_number: 24863, scopes: ["external docs"], type: "fix", breaking_change: false, author: "Pavlos Rontidis", files_count: 22, insertions_count: 1755, deletions_count: 943}, + {sha: "f2c50cbad476eaf8b0679f19354188b60bb2affb", date: "2026-03-09 23:01:50 UTC", description: "add changes job to integration-test-suite needs to catch cancellations", pr_number: 24875, scopes: ["ci"], type: "fix", breaking_change: false, author: "Thomas", files_count: 1, insertions_count: 1, deletions_count: 0}, + ] +} diff --git a/website/cue/reference/remap.cue b/website/cue/reference/remap.cue index 93aa2e0d074af..fdbf88a259e48 100644 --- a/website/cue/reference/remap.cue +++ b/website/cue/reference/remap.cue @@ -13,13 +13,11 @@ package metadata name: Name } - #Example: { + #BaseExample: { title: string - input?: #Event source: string diff?: string return?: _ - output?: #Event | [#Event, ...#Event] raises?: _ notes?: [string, ...string] @@ -29,6 +27,17 @@ package metadata skip_test?: bool } + #Example: { + #BaseExample + input?: #Event + output?: #Event | [#Event, ...#Event] + } + + #FunctionExample: { + #BaseExample + input?: {...} + } + #Type: "any" | "array" | "boolean" | "float" | "integer" | "object" | "null" | "path" | "string" | "regex" | "timestamp" concepts: _ diff --git a/website/cue/reference/remap/functions.cue b/website/cue/reference/remap/functions.cue index 62df2d555f883..90b551ada515c 100644 --- a/website/cue/reference/remap/functions.cue +++ b/website/cue/reference/remap/functions.cue @@ -23,7 +23,7 @@ remap: { rules?: [string, ...string] } internal_failure_reasons: [...string] - examples?: [remap.#Example, ...remap.#Example] + examples?: [remap.#FunctionExample, ...remap.#FunctionExample] deprecated: bool | *false pure: bool | *true } @@ -58,69 +58,4 @@ remap: { functions: [Name=string]: #Function & { name: Name } - - // Reusable text - _enrichment_table_explainer: """ - For `file` enrichment tables, this condition needs to be a VRL object in which - the key-value pairs indicate a field to search mapped to a value to search in that field. - This function returns the rows that match the provided condition(s). _All_ fields need to - match for rows to be returned; if any fields do not match, then no rows are returned. - - There are currently three forms of search criteria: - - 1. **Exact match search**. The given field must match the value exactly. Case sensitivity - can be specified using the `case_sensitive` argument. An exact match search can use an - index directly into the dataset, which should make this search fairly "cheap" from a - performance perspective. - - 2. **Wildcard match search**. The given fields specified by the exact match search may also - be matched exactly to the value provided to the `wildcard` parameter. - A wildcard match search can also use an index directly into the dataset. - - 3. **Date range search**. The given field must be greater than or equal to the `from` date - and/or less than or equal to the `to` date. A date range search involves - sequentially scanning through the rows that have been located using any exact match - criteria. This can be an expensive operation if there are many rows returned by any exact - match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment - data set is very small. - - For `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair - whose value needs to be a valid IP address. Example: `{"ip": .ip }`. If a return field is expected - and without a value, `null` is used. This table can return the following fields: - - * ISP databases: - * `autonomous_system_number` - * `autonomous_system_organization` - * `isp` - * `organization` - - * City databases: - * `city_name` - * `continent_code` - * `country_code` - * `country_name` - * `region_code` - * `region_name` - * `metro_code` - * `latitude` - * `longitude` - * `postal_code` - * `timezone` - - * Connection-Type databases: - * `connection_type` - - To use this function, you need to update your configuration to - include an - [`enrichment_tables`](\(urls.vector_configuration_global)/#enrichment_tables) - parameter. - """ - - _vector_metrics_explainer: """ - Internal Vector metrics functions work with a snapshot of the metrics. The interval at which - the snapshot is updated is controlled through the - [`metrics_storage_refresh_period`](\(urls.vector_configuration_global)/#metrics_storage_refresh_period) - global option. Higher values can reduce performance impact of that process, but may cause - stale metrics data in the snapshot. - """ } diff --git a/website/cue/reference/remap/functions/.gitignore b/website/cue/reference/remap/functions/.gitignore new file mode 100644 index 0000000000000..89cd6fdc6b606 --- /dev/null +++ b/website/cue/reference/remap/functions/.gitignore @@ -0,0 +1 @@ +generated.cue diff --git a/website/cue/reference/remap/functions/abs.cue b/website/cue/reference/remap/functions/abs.cue deleted file mode 100644 index f8aad893be3a9..0000000000000 --- a/website/cue/reference/remap/functions/abs.cue +++ /dev/null @@ -1,41 +0,0 @@ -package metadata - -remap: functions: abs: { - category: "Number" - description: """ - Computes the absolute value of `value`. - """ - - arguments: [ - { - name: "value" - description: "The number to calculate the absolute value." - required: true - type: ["integer", "float"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["integer", "float"] - rules: [ - "Returns the absolute value.", - ] - } - - examples: [ - { - title: "Computes the absolute value of the integer" - source: #""" - abs(-42) - """# - return: 42 - }, - { - title: "Computes the absolute value of the float" - source: #""" - abs(-42.2) - """# - return: 42.2 - }, - ] -} diff --git a/website/cue/reference/remap/functions/aggregate_vector_metrics.cue b/website/cue/reference/remap/functions/aggregate_vector_metrics.cue deleted file mode 100644 index cdf2c0141dd70..0000000000000 --- a/website/cue/reference/remap/functions/aggregate_vector_metrics.cue +++ /dev/null @@ -1,82 +0,0 @@ -package metadata - -remap: functions: aggregate_vector_metric: { - category: "Metrics" - description: """ - Aggregates internal Vector metrics, using one of 4 aggregation functions, filtering by name - and optionally by tags. Returns the aggregated value. Only includes counter and gauge metrics. - - \(remap._vector_metrics_explainer) - """ - - arguments: [ - { - name: "function" - description: "The metric name to search." - required: true - type: ["string"] - enum: { - sum: "Sum the values of all the matched metrics" - avg: "Find the average of the values of all the matched metrics" - max: "Find the highest metric value of all the matched metrics" - min: "Find the lowest metric value of all the matched metrics" - } - }, - { - name: "key" - description: "The metric name to aggregate." - required: true - type: ["string"] - }, - { - name: "tags" - description: """ - Tags to filter the results on. Values in this object support wildcards ('*') to - match on parts of the tag value. - """ - required: false - type: ["object"] - }, - ] - internal_failure_reasons: [] - return: types: ["float"] - - examples: [ - { - title: "Sum vector internal metrics matching the name" - source: #""" - aggregate_vector_metrics("sum", "utilization") - """# - return: 0.5 - }, - { - - title: "Sum vector internal metrics matching the name and tags" - source: #""" - aggregate_vector_metrics("sum", "utilization", tags: {"component_id": "test"}) - """# - return: 0.5 - }, - { - title: "Average of vector internal metrics matching the name" - source: #""" - aggregate_vector_metrics("avg", "utilization") - """# - return: 0.5 - }, - { - title: "Max of vector internal metrics matching the name" - source: #""" - aggregate_vector_metrics("max", "utilization") - """# - return: 0.5 - }, - { - title: "Min of vector internal metrics matching the name" - source: #""" - aggregate_vector_metrics("max", "utilization") - """# - return: 0.5 - }, - ] -} diff --git a/website/cue/reference/remap/functions/append.cue b/website/cue/reference/remap/functions/append.cue deleted file mode 100644 index 289c62296cd40..0000000000000 --- a/website/cue/reference/remap/functions/append.cue +++ /dev/null @@ -1,35 +0,0 @@ -package metadata - -remap: functions: append: { - category: "Array" - description: """ - Appends each item in the `items` array to the end of the `value` array. - """ - - arguments: [ - { - name: "value" - description: "The initial array." - required: true - type: ["array"] - }, - { - name: "items" - description: "The items to append." - required: true - type: ["array"] - }, - ] - internal_failure_reasons: [] - return: types: ["array"] - - examples: [ - { - title: "Append to an array" - source: """ - append([1, 2], [3, 4]) - """ - return: [1, 2, 3, 4] - }, - ] -} diff --git a/website/cue/reference/remap/functions/array.cue b/website/cue/reference/remap/functions/array.cue deleted file mode 100644 index 9a249ce918dc0..0000000000000 --- a/website/cue/reference/remap/functions/array.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: array: { - category: "Type" - description: """ - Returns `value` if it is an array, otherwise returns an error. This enables the type checker to guarantee that the - returned value is an array and can be used in any function that expects an array. - """ - - arguments: [ - { - name: "value" - description: "The value to check if it is an array." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [ - "`value` is not an array.", - ] - return: { - types: ["array"] - rules: [ - #"Returns the `value` if it's an array."#, - #"Raises an error if not an array."#, - ] - } - examples: [ - { - title: "Declare an array type" - input: log: value: [1, 2, 3] - source: #""" - array!(.value) - """# - return: input.log.value - }, - ] -} diff --git a/website/cue/reference/remap/functions/assert.cue b/website/cue/reference/remap/functions/assert.cue deleted file mode 100644 index 6e7d905ac425b..0000000000000 --- a/website/cue/reference/remap/functions/assert.cue +++ /dev/null @@ -1,58 +0,0 @@ -package metadata - -remap: functions: assert: { - category: "Debug" - description: """ - Asserts the `condition`, which must be a Boolean expression. The program is aborted with - `message` if the condition evaluates to `false`. - """ - notices: [ - """ - The `assert` function should be used in a standalone fashion and only when you want to abort the program. You - should avoid it in logical expressions and other situations in which you want the program to continue if the - condition evaluates to `false`. - """, - ] - - pure: false - - arguments: [ - { - name: "condition" - description: "The condition to check." - required: true - type: ["boolean"] - }, - { - name: "message" - description: """ - An optional custom error message. If the equality assertion fails, `message` is - appended to the default message prefix. See the [examples](#assert-examples) below - for a fully formed log message sample. - """ - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`condition` evaluates to `false`.", - ] - return: types: ["boolean"] - - examples: [ - { - title: "Assertion (true)" - source: #""" - assert!("foo" == "foo", message: "\"foo\" must be \"foo\"!") - """# - return: true - }, - { - title: "Assertion (false)" - source: #""" - assert!("foo" == "bar", message: "\"foo\" must be \"foo\"!") - """# - raises: runtime: #"function call error for "assert" at (0:60): "foo" must be "foo"!"# - }, - ] -} diff --git a/website/cue/reference/remap/functions/assert_eq.cue b/website/cue/reference/remap/functions/assert_eq.cue deleted file mode 100644 index 97f172470122f..0000000000000 --- a/website/cue/reference/remap/functions/assert_eq.cue +++ /dev/null @@ -1,69 +0,0 @@ -package metadata - -remap: functions: assert_eq: { - category: "Debug" - - description: """ - Asserts that two expressions, `left` and `right`, have the same value. The program is - aborted with `message` if they do not have the same value. - """ - - notices: [ - """ - The `assert_eq` function should be used in a standalone fashion and only when you want to - abort the program. You should avoid it in logical expressions and other situations in which - you want the program to continue if the condition evaluates to `false`. - """, - ] - - pure: false - - arguments: [ - { - name: "left" - description: "The value to check for equality against `right`." - required: true - type: ["any"] - }, - { - name: "right" - description: "The value to check for equality against `left`." - required: true - type: ["any"] - }, - { - name: "message" - description: """ - An optional custom error message. If the equality assertion fails, `message` is - appended to the default message prefix. See the [examples](#assert_eq-examples) - below for a fully formed log message sample. - """ - required: false - type: ["string"] - }, - ] - - internal_failure_reasons: [] - - return: types: ["boolean"] - - examples: [ - { - title: "Successful assertion" - source: "assert_eq!(1, 1)" - return: true - }, - { - title: "Unsuccessful assertion" - source: "assert_eq!(127, [1, 2, 3])" - raises: runtime: #"function call error for "assert_eq" at (0:26): assertion failed: 127 == [1, 2, 3]"# - }, - { - title: "Unsuccessful assertion with custom log message" - source: #""" - assert_eq!(1, 0, message: "Unequal integers") - """# - raises: runtime: #"function call error for "assert_eq" at (1:46): Unequal integers"# - }, - ] -} diff --git a/website/cue/reference/remap/functions/basename.cue b/website/cue/reference/remap/functions/basename.cue deleted file mode 100644 index 1d0c4d88eb315..0000000000000 --- a/website/cue/reference/remap/functions/basename.cue +++ /dev/null @@ -1,53 +0,0 @@ -package metadata - -remap: functions: basename: { - category: "String" - description: """ - Returns the filename component of the given `path`. This is similar to the Unix `basename` command. - If the path ends in a directory separator, the function returns the name of the directory. - """ - - arguments: [ - { - name: "value" - description: "The path from which to extract the basename." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid string.", - ] - return: types: ["string", "null"] - - examples: [ - { - title: "Extract basename from file path" - source: """ - basename!("/usr/local/bin/vrl") - """ - return: "vrl" - }, - { - title: "Extract basename from file path with extension" - source: """ - basename!("/home/user/file.txt") - """ - return: "file.txt" - }, - { - title: "Extract basename from directory path" - source: """ - basename!("/home/user/") - """ - return: "user" - }, - { - title: "Root directory has no basename" - source: """ - basename!("/") - """ - return: null - }, - ] -} diff --git a/website/cue/reference/remap/functions/bool.cue b/website/cue/reference/remap/functions/bool.cue deleted file mode 100644 index e7fa4398e5969..0000000000000 --- a/website/cue/reference/remap/functions/bool.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: bool: { - category: "Type" - description: """ - Returns `value` if it is a Boolean, otherwise returns an error. This enables the type checker to guarantee that the - returned value is a Boolean and can be used in any function that expects a Boolean. - """ - - arguments: [ - { - name: "value" - description: "The value to check if it is a Boolean." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [ - "`value` is not a Boolean.", - ] - return: { - types: ["boolean"] - rules: [ - #"Returns `value` if it's a Boolean."#, - #"Raises an error if not a Boolean."#, - ] - } - examples: [ - { - title: "Declare a Boolean type" - input: log: value: false - source: #""" - bool!(.value) - """# - return: input.log.value - }, - ] -} diff --git a/website/cue/reference/remap/functions/camelcase.cue b/website/cue/reference/remap/functions/camelcase.cue deleted file mode 100644 index 9c8eb246ec617..0000000000000 --- a/website/cue/reference/remap/functions/camelcase.cue +++ /dev/null @@ -1,43 +0,0 @@ -package metadata - -remap: functions: camelcase: { - category: "String" - description: """ - Takes the `value` string, and turns it into camelCase. Optionally, you can - pass in the existing case of the function, or else an attempt is made to determine the case automatically. - """ - - arguments: [ - { - name: "value" - description: "The string to convert to camelCase." - required: true - type: ["string"] - }, - { - name: "original_case" - description: "Optional hint on the original case type. Must be one of: kebab-case, camelCase, PascalCase, SCREAMING_SNAKE, snake_case" - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "camelCase a string" - source: #""" - camelcase("input-string") - """# - return: "inputString" - }, - { - title: "camelCase a string" - source: #""" - camelcase("input-string", "kebab-case") - """# - return: "inputString" - }, - ] -} diff --git a/website/cue/reference/remap/functions/ceil.cue b/website/cue/reference/remap/functions/ceil.cue deleted file mode 100644 index 8b7175ed60dc7..0000000000000 --- a/website/cue/reference/remap/functions/ceil.cue +++ /dev/null @@ -1,48 +0,0 @@ -package metadata - -remap: functions: ceil: { - category: "Number" - description: """ - Rounds the `value` up to the specified `precision`. - """ - - arguments: [ - { - name: "value" - description: "The number to round up." - required: true - type: ["integer", "float"] - }, - { - name: "precision" - description: "The number of decimal places to round to." - required: false - default: 0 - type: ["integer"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["integer", "float"] - rules: [ - "Returns an integer if `precision` is `0` (this is the default). Returns a float otherwise.", - ] - } - - examples: [ - { - title: "Round a number up (without precision)" - source: #""" - ceil(4.345) - """# - return: 5.0 - }, - { - title: "Round a number up (with precision)" - source: #""" - ceil(4.345, precision: 2) - """# - return: 4.35 - }, - ] -} diff --git a/website/cue/reference/remap/functions/chunks.cue b/website/cue/reference/remap/functions/chunks.cue deleted file mode 100644 index 3ad4cf375badf..0000000000000 --- a/website/cue/reference/remap/functions/chunks.cue +++ /dev/null @@ -1,50 +0,0 @@ -package metadata - -remap: functions: chunks: { - category: "Array" - description: """ - Chunks `value` into slices of length `chunk_size` bytes. - """ - - arguments: [ - { - name: "value" - description: "The array of bytes to split." - required: true - type: ["array", "string"] - }, - { - name: "chunk_size" - description: "The desired length of each chunk in bytes. This may be constrained by the host platform architecture." - required: true - type: ["integer"] - }, - ] - internal_failure_reasons: [ - "`chunk_size` must be at least 1 byte.", - "`chunk_size` is too large.", - ] - return: { - types: ["array"] - rules: [ - "`chunks` is considered fallible if the supplied `chunk_size` is an expression, and infallible if it's a literal integer.", - ] - } - - examples: [ - { - title: "Split a string into chunks" - source: #""" - chunks("abcdefgh", 4) - """# - return: ["abcd", "efgh"] - }, - { - title: "Chunks do not respect unicode code point boundaries" - source: #""" - chunks("ab你好", 4) - """# - return: ["ab�", "�好"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/community_id.cue b/website/cue/reference/remap/functions/community_id.cue deleted file mode 100644 index f03541319d158..0000000000000 --- a/website/cue/reference/remap/functions/community_id.cue +++ /dev/null @@ -1,59 +0,0 @@ -package metadata - -remap: functions: community_id: { - category: "String" - description: """ - Generates an ID based on the [Community ID Spec](\(urls.community_id_spec)). - """ - - arguments: [ - { - name: "source_ip" - description: "The source IP address." - required: true - type: ["string"] - }, - { - name: "destination_ip" - description: "The destination IP address." - required: true - type: ["string"] - }, - { - name: "protocol" - description: "The protocol number." - required: true - type: ["integer"] - }, - { - name: "source_port" - description: "The source port or ICMP type." - required: false - type: ["integer"] - }, - { - name: "destination_port" - description: "The destination port or ICMP code." - required: false - type: ["integer"] - }, - { - name: "seed" - description: "The custom seed number." - required: false - type: ["integer"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "TCP" - source: #""" - community_id!(source_ip: "1.2.3.4", destination_ip: "5.6.7.8", source_port: 1122, destination_port: 3344, protocol: 6) - """# - return: "1:wCb3OG7yAFWelaUydu0D+125CLM=" - }, - ] -} diff --git a/website/cue/reference/remap/functions/compact.cue b/website/cue/reference/remap/functions/compact.cue deleted file mode 100644 index d64f0104064bd..0000000000000 --- a/website/cue/reference/remap/functions/compact.cue +++ /dev/null @@ -1,83 +0,0 @@ -package metadata - -remap: functions: compact: { - category: "Enumerate" - description: """ - Compacts the `value` by removing empty values, where empty values are defined using the - available parameters. - """ - - arguments: [ - { - name: "value" - description: "The object or array to compact." - required: true - type: ["array", "object"] - }, - { - name: "recursive" - description: "Whether the compaction be recursive." - required: false - default: true - type: ["boolean"] - }, - { - name: "null" - description: "Whether null should be treated as an empty value." - required: false - default: true - type: ["boolean"] - }, - { - name: "string" - description: "Whether an empty string should be treated as an empty value." - required: false - default: true - type: ["boolean"] - }, - { - name: "object" - description: "Whether an empty object should be treated as an empty value." - required: false - default: true - type: ["boolean"] - }, - { - name: "array" - description: "Whether an empty array should be treated as an empty value." - required: false - default: true - type: ["boolean"] - }, - { - name: "nullish" - description: #"Tests whether the value is "nullish" as defined by the [`is_nullish`](#is_nullish) function."# - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array", "object"] - rules: [ - "The return type matches the `value` type.", - ] - } - examples: [ - { - title: "Compact an array" - source: #""" - compact(["foo", "bar", "", null, [], "buzz"], string: true, array: true, null: true) - """# - return: ["foo", "bar", "buzz"] - }, - { - title: "Compact an object" - source: #""" - compact({"field1": 1, "field2": "", "field3": [], "field4": null}, string: true, array: true, null: true) - """# - return: field1: 1 - }, - ] -} diff --git a/website/cue/reference/remap/functions/contains.cue b/website/cue/reference/remap/functions/contains.cue deleted file mode 100644 index c12d36edb5df5..0000000000000 --- a/website/cue/reference/remap/functions/contains.cue +++ /dev/null @@ -1,49 +0,0 @@ -package metadata - -remap: functions: contains: { - category: "String" - description: """ - Determines whether the `value` string contains the specified `substring`. - """ - - arguments: [ - { - name: "value" - description: "The text to search." - required: true - type: ["string"] - }, - { - name: "substring" - description: "The substring to search for in `value`." - required: true - type: ["string"] - }, - { - name: "case_sensitive" - description: "Whether the match should be case sensitive." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "String contains (case sensitive)" - source: #""" - contains("The Needle In The Haystack", "Needle") - """# - return: true - }, - { - title: "String contains (case insensitive)" - source: #""" - contains("The Needle In The Haystack", "needle", case_sensitive: false) - """# - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/contains_all.cue b/website/cue/reference/remap/functions/contains_all.cue deleted file mode 100644 index dfd7cf07eeb78..0000000000000 --- a/website/cue/reference/remap/functions/contains_all.cue +++ /dev/null @@ -1,49 +0,0 @@ -package metadata - -remap: functions: contains_all: { - category: "String" - description: """ - Determines whether the `value` string contains all the specified `substrings`. - """ - - arguments: [ - { - name: "value" - description: "The text to search." - required: true - type: ["string"] - }, - { - name: "substrings" - description: "An array of substrings to search for in `value`." - required: true - type: ["array"] - }, - { - name: "case_sensitive" - description: "Whether the match should be case sensitive." - required: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "String contains all" - source: #""" - contains_all("The Needle In The Haystack", ["Needle", "Haystack"]) - """# - return: true - }, - { - title: "String contains all (case sensitive)" - source: #""" - contains_all("the NEEDLE in the haystack", ["needle", "haystack"]) - """# - return: false - }, - ] - -} diff --git a/website/cue/reference/remap/functions/crc.cue b/website/cue/reference/remap/functions/crc.cue deleted file mode 100644 index d42e6faf5fd70..0000000000000 --- a/website/cue/reference/remap/functions/crc.cue +++ /dev/null @@ -1,164 +0,0 @@ -package metadata - -remap: functions: crc: { - category: "Checksum" - description: """ - Calculates a CRC of the `value`. - The CRC `algorithm` used can be optionally specified. - - This function is infallible if either the default `algorithm` value or a recognized-valid compile-time - `algorithm` string literal is used. Otherwise, it is fallible. - """ - - arguments: [ - { - name: "value" - description: "The string to calculate the checksum for." - required: true - type: ["string"] - }, - { - name: "algorithm" - description: "The CRC algorithm to use." - enum: { - "CRC_3_GSM": "3-bit CRC used in GSM telecommunications for error detection" - "CRC_3_ROHC": "3-bit CRC used in Robust Header Compression (ROHC) protocol" - "CRC_4_G_704": "4-bit CRC specified in ITU-T G.704 for synchronous communication systems" - "CRC_4_INTERLAKEN": "4-bit CRC used in Interlaken high-speed serial communication protocol" - "CRC_5_EPC_C1G2": "5-bit CRC used in EPC Gen 2 RFID (Radio-Frequency Identification) standard" - "CRC_5_G_704": "5-bit CRC variant in ITU-T G.704 telecommunication standard" - "CRC_5_USB": "5-bit CRC used in USB communication for detecting transmission errors" - "CRC_6_CDMA2000_A": "6-bit CRC variant used in CDMA2000 network protocols" - "CRC_6_CDMA2000_B": "Alternative 6-bit CRC variant for CDMA2000 network protocols" - "CRC_6_DARC": "6-bit CRC used in DARC (Digital Audio Radio Channel) communication" - "CRC_6_GSM": "6-bit CRC variant used in GSM telecommunications" - "CRC_6_G_704": "6-bit CRC specified in ITU-T G.704 for synchronous communication" - "CRC_7_MMC": "7-bit CRC used in MultiMediaCard (MMC) storage systems for error detection" - "CRC_7_ROHC": "7-bit CRC used in Robust Header Compression (ROHC) protocol" - "CRC_7_UMTS": "7-bit CRC used in UMTS (Universal Mobile Telecommunications System)" - "CRC_8_AUTOSAR": "8-bit CRC used in AUTOSAR (Automotive Open System Architecture) standard" - "CRC_8_BLUETOOTH": "8-bit CRC polynomial used in Bluetooth communication protocols" - "CRC_8_CDMA2000": "8-bit CRC used in CDMA2000 cellular communication standard" - "CRC_8_DARC": "8-bit CRC used in DARC (Digital Audio Radio Channel) communication" - "CRC_8_DVB_S2": "8-bit CRC used in DVB-S2 (Digital Video Broadcasting Satellite Second Generation)" - "CRC_8_GSM_A": "8-bit CRC variant A used in GSM telecommunications" - "CRC_8_GSM_B": "8-bit CRC variant B used in GSM telecommunications" - "CRC_8_HITAG": "8-bit CRC used in Hitag RFID and transponder systems" - "CRC_8_I_432_1": "8-bit CRC specified in IEEE 1432.1 standard" - "CRC_8_I_CODE": "8-bit CRC used in I-CODE RFID systems" - "CRC_8_LTE": "8-bit CRC used in LTE (Long-Term Evolution) cellular networks" - "CRC_8_MAXIM_DOW": "8-bit CRC used by Maxim/Dallas Semiconductor for 1-Wire and iButton devices" - "CRC_8_MIFARE_MAD": "8-bit CRC used in MIFARE MAD (Multiple Application Directory) protocol" - "CRC_8_NRSC_5": "8-bit CRC used in NRSC-5 digital radio broadcasting standard" - "CRC_8_OPENSAFETY": "8-bit CRC used in OpenSAFETY industrial communication protocol" - "CRC_8_ROHC": "8-bit CRC used in Robust Header Compression (ROHC) protocol" - "CRC_8_SAE_J1850": "8-bit CRC used in SAE J1850 automotive communication protocol" - "CRC_8_SMBUS": "8-bit CRC used in System Management Bus (SMBus) communication" - "CRC_8_TECH_3250": "8-bit CRC used in SMPTE (Society of Motion Picture and Television Engineers) standard" - "CRC_8_WCDMA": "8-bit CRC used in WCDMA (Wideband Code Division Multiple Access) networks" - "CRC_10_ATM": "10-bit CRC used in ATM (Asynchronous Transfer Mode) cell headers" - "CRC_10_CDMA2000": "10-bit CRC used in CDMA2000 cellular communication standard" - "CRC_10_GSM": "10-bit CRC variant used in GSM telecommunications" - "CRC_11_FLEXRAY": "11-bit CRC used in FlexRay automotive communication protocol" - "CRC_11_UMTS": "11-bit CRC used in UMTS (Universal Mobile Telecommunications System)" - "CRC_12_CDMA2000": "12-bit CRC used in CDMA2000 cellular communication standard" - "CRC_12_DECT": "12-bit CRC used in DECT (Digital Enhanced Cordless Telecommunications) standards" - "CRC_12_GSM": "12-bit CRC variant used in GSM telecommunications" - "CRC_12_UMTS": "12-bit CRC used in UMTS (Universal Mobile Telecommunications System)" - "CRC_13_BBC": "13-bit CRC used in BBC (British Broadcasting Corporation) digital transmission" - "CRC_14_DARC": "14-bit CRC used in DARC (Digital Audio Radio Channel) communication" - "CRC_14_GSM": "14-bit CRC variant used in GSM telecommunications" - "CRC_15_CAN": "15-bit CRC used in CAN (Controller Area Network) automotive communication" - "CRC_15_MPT1327": "15-bit CRC used in MPT 1327 radio trunking system" - "CRC_16_ARC": "16-bit CRC used in ARC (Adaptive Routing Code) communication" - "CRC_16_CDMA2000": "16-bit CRC used in CDMA2000 cellular communication standard" - "CRC_16_CMS": "16-bit CRC used in Content Management Systems for data integrity" - "CRC_16_DDS_110": "16-bit CRC used in DDS (Digital Data Storage) standard" - "CRC_16_DECT_R": "16-bit CRC variant R used in DECT communication" - "CRC_16_DECT_X": "16-bit CRC variant X used in DECT communication" - "CRC_16_DNP": "16-bit CRC used in DNP3 (Distributed Network Protocol) for utilities" - "CRC_16_EN_13757": "16-bit CRC specified in EN 13757 for meter communication" - "CRC_16_GENIBUS": "16-bit CRC used in GENIBUS communication protocol" - "CRC_16_GSM": "16-bit CRC variant used in GSM telecommunications" - "CRC_16_IBM_3740": "16-bit CRC used in IBM 3740 data integrity checks" - "CRC_16_IBM_SDLC": "16-bit CRC used in IBM SDLC (Synchronous Data Link Control)" - "CRC_16_ISO_IEC_14443_3_A": "16-bit CRC used in ISO/IEC 14443-3 Type A contactless smart cards" - "CRC_16_KERMIT": "16-bit CRC used in Kermit file transfer protocol" - "CRC_16_LJ1200": "16-bit CRC used in LJ1200 communication system" - "CRC_16_M17": "16-bit CRC used in M17 digital radio communication" - "CRC_16_MAXIM_DOW": "16-bit CRC used by Maxim/Dallas Semiconductor for data integrity" - "CRC_16_MCRF4XX": "16-bit CRC used in MCRF4XX RFID systems" - "CRC_16_MODBUS": "16-bit CRC used in Modbus communication protocol for error detection" - "CRC_16_NRSC_5": "16-bit CRC used in NRSC-5 digital radio broadcasting standard" - "CRC_16_OPENSAFETY_A": "16-bit CRC variant A in OpenSAFETY industrial communication" - "CRC_16_OPENSAFETY_B": "16-bit CRC variant B in OpenSAFETY industrial communication" - "CRC_16_PROFIBUS": "16-bit CRC used in PROFIBUS industrial communication protocol" - "CRC_16_RIELLO": "16-bit CRC used in Riello UPS communication" - "CRC_16_SPI_FUJITSU": "16-bit CRC used in Fujitsu SPI (Serial Peripheral Interface) communication" - "CRC_16_T10_DIF": "16-bit CRC used in T10 DIF (Data Integrity Field) standard" - "CRC_16_TELEDISK": "16-bit CRC used in Teledisk disk image format" - "CRC_16_TMS37157": "16-bit CRC used in TMS37157 microcontroller communication" - "CRC_16_UMTS": "16-bit CRC used in UMTS (Universal Mobile Telecommunications System)" - "CRC_16_USB": "16-bit CRC used in USB communication for error detection" - "CRC_16_XMODEM": "16-bit CRC used in XMODEM file transfer protocol" - "CRC_17_CAN_FD": "17-bit CRC used in CAN FD (Flexible Data-Rate) automotive communication protocol" - "CRC_21_CAN_FD": "21-bit CRC variant used in CAN FD (Flexible Data-Rate) automotive communication" - "CRC_24_BLE": "24-bit CRC used in Bluetooth Low Energy (BLE) packet error checking" - "CRC_24_FLEXRAY_A": "24-bit CRC variant A used in FlexRay automotive communication protocol" - "CRC_24_FLEXRAY_B": "24-bit CRC variant B used in FlexRay automotive communication protocol" - "CRC_24_INTERLAKEN": "24-bit CRC used in Interlaken high-speed serial communication protocol" - "CRC_24_LTE_A": "24-bit CRC variant A used in LTE (Long-Term Evolution) cellular networks" - "CRC_24_LTE_B": "24-bit CRC variant B used in LTE (Long-Term Evolution) cellular networks" - "CRC_24_OPENPGP": "24-bit CRC used in OpenPGP (Pretty Good Privacy) for data integrity" - "CRC_24_OS_9": "24-bit CRC used in OS-9 operating system for error detection" - "CRC_30_CDMA": "30-bit CRC used in CDMA (Code Division Multiple Access) communication standard" - "CRC_31_PHILIPS": "31-bit CRC used in Philips communication protocols" - "CRC_32_AIXM": "32-bit CRC used in Aeronautical Information Exchange Model (AIXM)" - "CRC_32_AUTOSAR": "32-bit CRC used in AUTOSAR (Automotive Open System Architecture) standard" - "CRC_32_BASE91_D": "32-bit CRC variant used in Base91 data encoding" - "CRC_32_BZIP2": "32-bit CRC used in bzip2 compression algorithm" - "CRC_32_CD_ROM_EDC": "32-bit CRC used for Error Detection Code in CD-ROM systems" - "CRC_32_CKSUM": "32-bit CRC used in UNIX cksum command for file integrity" - "CRC_32_ISCSI": "32-bit CRC used in iSCSI (Internet Small Computer Systems Interface)" - "CRC_32_ISO_HDLC": "32-bit CRC used in ISO HDLC (High-Level Data Link Control)" - "CRC_32_JAMCRC": "32-bit CRC variant used in JAM error detection" - "CRC_32_MEF": "32-bit CRC used in Metro Ethernet Forum (MEF) standards" - "CRC_32_MPEG_2": "32-bit CRC used in MPEG-2 transport streams for error detection" - "CRC_32_XFER": "32-bit CRC used in data transfer protocols" - "CRC_40_GSM": "40-bit CRC variant used in GSM telecommunications" - "CRC_64_ECMA_182": "64-bit CRC specified in ECMA-182 standard" - "CRC_64_GO_ISO": "64-bit CRC used in Go programming language and ISO standards" - "CRC_64_MS": "64-bit CRC variant used in Microsoft systems" - "CRC_64_REDIS": "64-bit CRC used in Redis key-value data store" - "CRC_64_WE": "64-bit CRC variant for wide-area error detection" - "CRC_64_XZ": "64-bit CRC used in the XZ compression format for integrity verification" - "CRC_82_DARC": "82-bit CRC used in DARC (Digital Audio Radio Channel) communication" - } - required: false - default: "CRC_32_ISO_HDLC" - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a string.", - "`algorithm` is not a supported algorithm.", - ] - return: types: ["string"] - - examples: [ - { - title: "Create CRC checksum using the default algorithm" - source: #""" - crc("foo") - """# - return: "2356372769" - }, - { - title: "Create CRC checksum using the CRC_32_CKSUM algorithm" - source: #""" - crc("foo", algorithm: "CRC_32_CKSUM") - """# - return: "4271552933" - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_base16.cue b/website/cue/reference/remap/functions/decode_base16.cue deleted file mode 100644 index 12cb4d4959a9e..0000000000000 --- a/website/cue/reference/remap/functions/decode_base16.cue +++ /dev/null @@ -1,31 +0,0 @@ -package metadata - -remap: functions: decode_base16: { - category: "Codec" - description: """ - Decodes the `value` (a [Base16](\(urls.base16)) string) into its original string. - """ - - arguments: [ - { - name: "value" - description: "The [Base16](\(urls.base16)) data to decode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` isn't a valid encoded Base16 string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode Base16 data" - source: """ - decode_base16!("796f752068617665207375636365737366756c6c79206465636f646564206d65") - """ - return: "you have successfully decoded me" - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_base64.cue b/website/cue/reference/remap/functions/decode_base64.cue deleted file mode 100644 index 1ba2efbef296c..0000000000000 --- a/website/cue/reference/remap/functions/decode_base64.cue +++ /dev/null @@ -1,49 +0,0 @@ -package metadata - -remap: functions: decode_base64: { - category: "Codec" - description: """ - Decodes the `value` (a [Base64](\(urls.base64)) string) into its original string. - """ - - arguments: [ - { - name: "value" - description: "The [Base64](\(urls.base64)) data to decode." - required: true - type: ["string"] - }, - { - name: "charset" - description: "The character set to use when decoding the data." - required: false - type: ["string"] - default: "standard" - enum: { - standard: "[Standard](\(urls.base64_standard)) Base64 format." - url_safe: "Modified Base64 for [URL variants](\(urls.base64_url_safe))." - } - }, - ] - internal_failure_reasons: [ - "`value` isn't a valid encoded Base64 string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode Base64 data (default)" - source: """ - decode_base64!("eW91IGhhdmUgc3VjY2Vzc2Z1bGx5IGRlY29kZWQgbWU=") - """ - return: "you have successfully decoded me" - }, - { - title: "Decode Base64 data (URL safe)" - source: """ - decode_base64!("eW91IGNhbid0IG1ha2UgeW91ciBoZWFydCBmZWVsIHNvbWV0aGluZyBpdCB3b24ndA==", charset: "url_safe") - """ - return: "you can't make your heart feel something it won't" - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_charset.cue b/website/cue/reference/remap/functions/decode_charset.cue deleted file mode 100644 index 8d1d35fc90510..0000000000000 --- a/website/cue/reference/remap/functions/decode_charset.cue +++ /dev/null @@ -1,52 +0,0 @@ -package metadata - -remap: functions: decode_charset: { - category: "Codec" - description: """ - Decodes the `value` (a non-UTF8 string) to a UTF8 string using the specified [character set](\(urls.charset_standard)). - """ - - arguments: [ - { - name: "value" - description: "The non-UTF8 string to decode." - required: true - type: ["string"] - }, - { - name: "from_charset" - description: "The [character set](\(urls.charset_standard)) to use when decoding the data." - required: true - type: ["string"] - - }, - ] - internal_failure_reasons: [ - "`from_charset` isn't a valid [character set](\(urls.charset_standard)).", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode EUC-KR string" - source: """ - decode_charset!(decode_base64!("vsiz58fPvLy/5A=="), "euc-kr") - """ - return: "안녕하세요" - }, - { - title: "Decode EUC-JP string" - source: """ - decode_charset!(decode_base64!("pLOk86TLpMGkzw=="), "euc-jp") - """ - return: "こんにちは" - }, - { - title: "Decode GB2312 string" - source: """ - decode_charset!(decode_base64!("xOO6ww=="), "gb2312") - """ - return: "你好" - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_gzip.cue b/website/cue/reference/remap/functions/decode_gzip.cue deleted file mode 100644 index 099a3e53a0400..0000000000000 --- a/website/cue/reference/remap/functions/decode_gzip.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: decode_gzip: { - category: "Codec" - description: """ - Decodes the `value` (a [Gzip](\(urls.gzip)) string) into its original string. - """ - - arguments: [ - { - name: "value" - description: "The [Gzip](\(urls.gzip)) data to decode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` isn't a valid encoded Gzip string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode Gzip data" - source: #""" - encoded_text = decode_base64!("H4sIAHEAymMAA6vML1XISCxLVSguTU5OLS5OK83JqVRISU3OT0lNUchNBQD7BGDaIAAAAA==") - decode_gzip!(encoded_text) - """# - return: "you have successfully decoded me" - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_lz4.cue b/website/cue/reference/remap/functions/decode_lz4.cue deleted file mode 100644 index a77cd38f0ffb6..0000000000000 --- a/website/cue/reference/remap/functions/decode_lz4.cue +++ /dev/null @@ -1,59 +0,0 @@ -package metadata - -remap: functions: decode_lz4: { - category: "Codec" - description: """ - Decodes the `value` (an lz4 string) into its original string. `buf_size` is the size of the buffer to decode into, this must be equal to or larger than the uncompressed size. - If `prepended_size` is set to `true`, it expects the original uncompressed size to be prepended to the compressed data. - `prepended_size` is useful for some implementations of lz4 that require the original size to be known before decoding. - """ - - arguments: [ - { - name: "value" - description: "The lz4 block data to decode." - required: true - type: ["string"] - }, - { - name: "buf_size" - description: "The size of the buffer to decode into, this must be equal to or larger than the uncompressed size." - required: false - default: 1024 * 1024 // 1 MiB - type: ["integer"] - }, - { - name: "prepended_size" - description: "Some implementations of lz4 require the original uncompressed size to be prepended to the compressed data." - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [ - "`value` unable to decode value with lz4 frame decoder.", - "`value` unable to decode value with lz4 block decoder.", - "`value` unable to decode because the output is too large for the buffer.", - "`value` unable to decode because the prepended size is not a valid integer.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode Lz4 data with prepended size." - source: #""" - encoded_text = decode_base64!("LAAAAPAdVGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIDEzIGxhenkgZG9ncy4=") - decode_lz4!(encoded_text, prepended_size: true) - """# - return: "The quick brown fox jumps over 13 lazy dogs." - }, - { - title: "Decode Lz4 data without prepended size." - source: #""" - encoded_text = decode_base64!("8B1UaGUgcXVpY2sgYnJvd24gZm94IGp1bXBzIG92ZXIgMTMgbGF6eSBkb2dzLg==") - decode_lz4!(encoded_text) - """# - return: "The quick brown fox jumps over 13 lazy dogs." - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_mime_q.cue b/website/cue/reference/remap/functions/decode_mime_q.cue deleted file mode 100644 index 31b725df90f1c..0000000000000 --- a/website/cue/reference/remap/functions/decode_mime_q.cue +++ /dev/null @@ -1,45 +0,0 @@ -package metadata - -remap: functions: decode_mime_q: { - category: "Codec" - description: """ - Replaces q-encoded or base64-encoded [encoded-word](\(urls.encoded_word)) substrings in the `value` with their original string. - """ - - arguments: [ - { - name: "value" - description: "The string with [encoded-words](\(urls.encoded_word)) to decode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` has invalid encoded [encoded-word](\(urls.encoded_word)) string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode single encoded-word" - source: """ - decode_mime_q!("=?utf-8?b?SGVsbG8sIFdvcmxkIQ==?=") - """ - return: "Hello, World!" - }, - { - title: "Embedded" - source: """ - decode_mime_q!("From: =?utf-8?b?SGVsbG8sIFdvcmxkIQ==?= <=?utf-8?q?hello=5Fworld=40example=2ecom?=>") - """ - return: "From: Hello, World! " - }, - { - title: "Without charset" - source: """ - decode_mime_q!("?b?SGVsbG8sIFdvcmxkIQ==") - """ - return: "Hello, World!" - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_percent.cue b/website/cue/reference/remap/functions/decode_percent.cue deleted file mode 100644 index 9c355792e1676..0000000000000 --- a/website/cue/reference/remap/functions/decode_percent.cue +++ /dev/null @@ -1,29 +0,0 @@ -package metadata - -remap: functions: decode_percent: { - category: "Codec" - description: """ - Decodes a [percent-encoded](\(urls.percent_encoded_bytes)) `value` like a URL. - """ - - arguments: [ - { - name: "value" - description: "The string to decode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Percent decode a value" - source: """ - decode_percent("foo%20bar%3F") - """ - return: "foo bar?" - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_punycode.cue b/website/cue/reference/remap/functions/decode_punycode.cue deleted file mode 100644 index 89491b4971877..0000000000000 --- a/website/cue/reference/remap/functions/decode_punycode.cue +++ /dev/null @@ -1,52 +0,0 @@ -package metadata - -remap: functions: decode_punycode: { - category: "Codec" - description: """ - Decodes a [punycode](\(urls.punycode)) encoded `value`, such as an internationalized domain name ([IDN](\(urls.idn))). This function assumes that the value passed is meant to be used in IDN context and that it is either a domain name or a part of it. - """ - - arguments: [ - { - name: "value" - description: "The string to decode." - required: true - type: ["string"] - }, - { - name: "validate" - description: "If enabled, checks if the input string is a valid domain name." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [ - "`value` is not valid `punycode`", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode a punycode encoded internationalized domain name" - source: """ - decode_punycode!("www.xn--caf-dma.com") - """ - return: "www.café.com" - }, - { - title: "Decode an ASCII only string" - source: """ - decode_punycode!("www.cafe.com") - """ - return: "www.cafe.com" - }, - { - title: "Ignore validation" - source: """ - decode_punycode!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.", validate: false) - """ - return: "١٠.٦٦.٣٠.٥." - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_snappy.cue b/website/cue/reference/remap/functions/decode_snappy.cue deleted file mode 100644 index 473613297267e..0000000000000 --- a/website/cue/reference/remap/functions/decode_snappy.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: decode_snappy: { - category: "Codec" - description: """ - Decodes the `value` (a Snappy string) into its original string. - """ - - arguments: [ - { - name: "value" - description: "The Snappy data to decode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` isn't a valid encoded Snappy string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode Snappy data" - source: #""" - encoded_text = decode_base64!("LKxUaGUgcXVpY2sgYnJvd24gZm94IGp1bXBzIG92ZXIgMTMgbGF6eSBkb2dzLg==") - decode_snappy!(encoded_text) - """# - return: "The quick brown fox jumps over 13 lazy dogs." - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_zlib.cue b/website/cue/reference/remap/functions/decode_zlib.cue deleted file mode 100644 index 2fe4f4aa5a1d7..0000000000000 --- a/website/cue/reference/remap/functions/decode_zlib.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: decode_zlib: { - category: "Codec" - description: """ - Decodes the `value` (a [Zlib](\(urls.zlib)) string) into its original string. - """ - - arguments: [ - { - name: "value" - description: "The [Zlib](\(urls.zlib)) data to decode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` isn't a valid encoded Zlib string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode Zlib data" - source: #""" - encoded_text = decode_base64!("eJwNy4ENwCAIBMCNXIlQ/KqplUSgCdvXAS41qPMHshCB2R1zJlWIVlR6UURX2+wx2YcuK3kAb9C1wd6dn7Fa+QH9gRxr") - decode_zlib!(encoded_text) - """# - return: "you_have_successfully_decoded_me.congratulations.you_are_breathtaking." - }, - ] -} diff --git a/website/cue/reference/remap/functions/decode_zstd.cue b/website/cue/reference/remap/functions/decode_zstd.cue deleted file mode 100644 index 8b20e55e81337..0000000000000 --- a/website/cue/reference/remap/functions/decode_zstd.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: decode_zstd: { - category: "Codec" - description: """ - Decodes the `value` (a [Zstandard](\(urls.zstd)) string) into its original string. - """ - - arguments: [ - { - name: "value" - description: "The [Zstandard](\(urls.zstd)) data to decode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` isn't a valid encoded Zstd string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decode Zstd data" - source: #""" - encoded_text = decode_base64!("KLUv/QBY/QEAYsQOFKClbQBedqXsb96EWDax/f/F/z+gNU4ZTInaUeAj82KqPFjUzKqhcfDqAIsLvAsnY1bI/N2mHzDixRQA") - decode_zstd!(encoded_text) - """# - return: "you_have_successfully_decoded_me.congratulations.you_are_breathtaking." - }, - ] -} diff --git a/website/cue/reference/remap/functions/decrypt.cue b/website/cue/reference/remap/functions/decrypt.cue deleted file mode 100644 index 2f53615d721ee..0000000000000 --- a/website/cue/reference/remap/functions/decrypt.cue +++ /dev/null @@ -1,93 +0,0 @@ -package metadata - -remap: functions: decrypt: { - category: "Cryptography" - description: """ - Decrypts a string with a symmetric encryption algorithm. - - Supported Algorithms: - - * AES-256-CFB (key = 32 bytes, iv = 16 bytes) - * AES-192-CFB (key = 24 bytes, iv = 16 bytes) - * AES-128-CFB (key = 16 bytes, iv = 16 bytes) - * AES-256-OFB (key = 32 bytes, iv = 16 bytes) - * AES-192-OFB (key = 24 bytes, iv = 16 bytes) - * AES-128-OFB (key = 16 bytes, iv = 16 bytes) - * AES-128-SIV (key = 32 bytes, iv = 16 bytes) - * AES-256-SIV (key = 64 bytes, iv = 16 bytes) - * Deprecated - AES-256-CTR (key = 32 bytes, iv = 16 bytes) - * Deprecated - AES-192-CTR (key = 24 bytes, iv = 16 bytes) - * Deprecated - AES-128-CTR (key = 16 bytes, iv = 16 bytes) - * AES-256-CTR-LE (key = 32 bytes, iv = 16 bytes) - * AES-192-CTR-LE (key = 24 bytes, iv = 16 bytes) - * AES-128-CTR-LE (key = 16 bytes, iv = 16 bytes) - * AES-256-CTR-BE (key = 32 bytes, iv = 16 bytes) - * AES-192-CTR-BE (key = 24 bytes, iv = 16 bytes) - * AES-128-CTR-BE (key = 16 bytes, iv = 16 bytes) - * AES-256-CBC-PKCS7 (key = 32 bytes, iv = 16 bytes) - * AES-192-CBC-PKCS7 (key = 24 bytes, iv = 16 bytes) - * AES-128-CBC-PKCS7 (key = 16 bytes, iv = 16 bytes) - * AES-256-CBC-ANSIX923 (key = 32 bytes, iv = 16 bytes) - * AES-192-CBC-ANSIX923 (key = 24 bytes, iv = 16 bytes) - * AES-128-CBC-ANSIX923 (key = 16 bytes, iv = 16 bytes) - * AES-256-CBC-ISO7816 (key = 32 bytes, iv = 16 bytes) - * AES-192-CBC-ISO7816 (key = 24 bytes, iv = 16 bytes) - * AES-128-CBC-ISO7816 (key = 16 bytes, iv = 16 bytes) - * AES-256-CBC-ISO10126 (key = 32 bytes, iv = 16 bytes) - * AES-192-CBC-ISO10126 (key = 24 bytes, iv = 16 bytes) - * AES-128-CBC-ISO10126 (key = 16 bytes, iv = 16 bytes) - * CHACHA20-POLY1305 (key = 32 bytes, iv = 12 bytes) - * XCHACHA20-POLY1305 (key = 32 bytes, iv = 24 bytes) - * XSALSA20-POLY1305 (key = 32 bytes, iv = 24 bytes) - """ - - arguments: [ - { - name: "ciphertext" - description: "The string in raw bytes (not encoded) to decrypt." - required: true - type: ["string"] - }, - { - name: "algorithm" - description: "The algorithm to use." - required: true - type: ["string"] - }, - { - name: "key" - description: "The key in raw bytes (not encoded) for decryption. The length must match the algorithm requested." - required: true - type: ["string"] - }, - { - name: "iv" - description: #""" - The IV in raw bytes (not encoded) for decryption. The length must match the algorithm requested. - A new IV should be generated for every message. You can use `random_bytes` to generate a cryptographically secure random value. - The value should match the one used during encryption. - """# - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`algorithm` is not a supported algorithm.", - "`key` length does not match the key size required for the algorithm specified.", - "`iv` length does not match the `iv` size required for the algorithm specified.", - ] - return: types: ["string"] - - examples: [ - { - title: "Decrypt value" - source: #""" - ciphertext = decode_base64!("5fLGcu1VHdzsPcGNDio7asLqE1P43QrVfPfmP4i4zOU=") - iv = decode_base64!("fVEIRkIiczCRWNxaarsyxA==") - key = "16_byte_keyxxxxx" - decrypt!(ciphertext, "AES-128-CBC-PKCS7", key, iv: iv) - """# - return: "super_secret_message" - }, - ] -} diff --git a/website/cue/reference/remap/functions/decrypt_ip.cue b/website/cue/reference/remap/functions/decrypt_ip.cue deleted file mode 100644 index 8bc782c68c7ef..0000000000000 --- a/website/cue/reference/remap/functions/decrypt_ip.cue +++ /dev/null @@ -1,93 +0,0 @@ -package metadata - -remap: functions: decrypt_ip: { - category: "IP" - description: """ - Decrypts an IP address that was previously encrypted, restoring the original IP address. - - Supported Modes: - - * AES128 - Decrypts an IP address that was scrambled using AES-128 encryption. Can transform between IPv4 and IPv6. - * PFX (Prefix-preserving) - Decrypts an IP address that was encrypted with prefix-preserving mode, where network hierarchy was maintained. - """ - notices: [ - """ - The `aes128` mode implements the `ipcrypt-deterministic` algorithm from the IPCrypt specification, while the `pfx` mode implements the `ipcrypt-pfx` algorithm. This function reverses the encryption performed by `encrypt_ip` - the same key and algorithm that were used for encryption must be used for decryption. - """, - ] - - arguments: [ - { - name: "ip" - description: "The encrypted IP address to decrypt (v4 or v6)." - required: true - type: ["string"] - }, - { - name: "key" - description: "The decryption key in raw bytes (not encoded). Must be the same key that was used for encryption. For AES128 mode, the key must be exactly 16 bytes. For PFX mode, the key must be exactly 32 bytes." - required: true - type: ["string"] - }, - { - name: "mode" - description: "The decryption mode to use. Must match the mode used for encryption: either `aes128` or `pfx`." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`ip` is not a valid IP address.", - "`mode` is not a supported mode (must be `aes128` or `pfx`).", - "`key` length does not match the requirements for the specified mode (16 bytes for `aes128`, 32 bytes for `pfx`).", - ] - return: types: ["string"] - - examples: [ - { - title: "Decrypt IPv4 address with AES128" - source: #""" - decrypted_ip = decrypt_ip!("72b9:a747:f2e9:72af:76ca:5866:6dcf:c3b0", "sixteen byte key", "aes128") - decrypted_ip - """# - return: "192.168.1.1" - }, - { - title: "Decrypt IPv6 address with AES128" - source: #""" - decrypted_ip = decrypt_ip!("c0e6:eb35:6887:f554:4c65:8ace:17ca:6c6a", "sixteen byte key", "aes128") - decrypted_ip - """# - return: "2001:db8::1" - }, - { - title: "Decrypt IPv4 address with prefix-preserving mode" - source: #""" - decrypted_ip = decrypt_ip!("33.245.248.61", "thirty-two bytes key for pfx use", "pfx") - decrypted_ip - """# - return: "192.168.1.1" - }, - { - title: "Decrypt IPv6 address with prefix-preserving mode" - source: #""" - decrypted_ip = decrypt_ip!("88bd:d2bf:8865:8c4d:84b:44f6:6077:72c9", "thirty-two bytes key for ipv6pfx", "pfx") - decrypted_ip - """# - return: "2001:db8::1" - }, - { - title: "Round-trip encryption and decryption" - source: #""" - original_ip = "192.168.1.100" - key = "sixteen byte key" - - encrypted = encrypt_ip!(original_ip, key, "aes128") - decrypted = decrypt_ip!(encrypted, key, "aes128") - - decrypted == original_ip - """# - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/del.cue b/website/cue/reference/remap/functions/del.cue deleted file mode 100644 index 5a825aa6e40f7..0000000000000 --- a/website/cue/reference/remap/functions/del.cue +++ /dev/null @@ -1,62 +0,0 @@ -package metadata - -remap: functions: del: { - category: "Path" - description: """ - Removes the field specified by the static `path` from the target. - - For dynamic path deletion, see the `remove` function. - """ - - pure: false - - arguments: [ - { - name: "path" - description: "The path of the field to delete." - required: true - type: ["path"] - }, - { - name: "compact" - description: """ - After deletion, if `compact` is `true` and there is an empty object or array left, - the empty object or array is also removed, cascading up to the root. This only - applies to the path being deleted, and any parent paths. - """ - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - notices: [ - """ - The `del` function _modifies the current event in place_ and returns the value of the deleted field. - """, - ] - return: { - types: ["any", "null"] - rules: [ - "Returns the value of the field being deleted. Returns `null` if the field doesn't exist.", - ] - } - - examples: [ - { - title: "Delete a field" - input: log: { - field1: 1 - field2: 2 - } - source: "del(.field1)" - output: log: field2: 2 - }, - { - title: "Rename a field" - input: log: old_field: "please rename me" - source: ".new_field = del(.old_field)" - output: log: new_field: "please rename me" - }, - ] -} diff --git a/website/cue/reference/remap/functions/dirname.cue b/website/cue/reference/remap/functions/dirname.cue deleted file mode 100644 index a8bac71de6c84..0000000000000 --- a/website/cue/reference/remap/functions/dirname.cue +++ /dev/null @@ -1,60 +0,0 @@ -package metadata - -remap: functions: dirname: { - category: "String" - description: """ - Returns the directory component of the given `path`. This is similar to the Unix `dirname` command. - The directory component is the path with the final component removed. - """ - - arguments: [ - { - name: "value" - description: "The path from which to extract the directory name." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Extract dirname from file path" - source: """ - dirname!("/usr/local/bin/vrl") - """ - return: "/usr/local/bin" - }, - { - title: "Extract dirname from file path with extension" - source: """ - dirname!("/home/user/file.txt") - """ - return: "/home/user" - }, - { - title: "Extract dirname from directory path" - source: """ - dirname!("/home/user/") - """ - return: "/home" - }, - { - title: "Root directory dirname is itself" - source: """ - dirname!("/") - """ - return: "/" - }, - { - title: "Relative files have current directory as dirname" - source: """ - dirname!("file.txt") - """ - return: "." - }, - ] -} diff --git a/website/cue/reference/remap/functions/downcase.cue b/website/cue/reference/remap/functions/downcase.cue deleted file mode 100644 index 1feb707899324..0000000000000 --- a/website/cue/reference/remap/functions/downcase.cue +++ /dev/null @@ -1,30 +0,0 @@ -package metadata - -remap: functions: downcase: { - category: "String" - description: """ - Downcases the `value` string, where downcase is defined according to the - Unicode Derived Core Property Lowercase. - """ - - arguments: [ - { - name: "value" - description: "The string to convert to lowercase." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Downcase a string" - source: #""" - downcase("Hello, World!") - """# - return: "hello, world!" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_base16.cue b/website/cue/reference/remap/functions/encode_base16.cue deleted file mode 100644 index b93cbb9843113..0000000000000 --- a/website/cue/reference/remap/functions/encode_base16.cue +++ /dev/null @@ -1,29 +0,0 @@ -package metadata - -remap: functions: encode_base16: { - category: "Codec" - description: """ - Encodes the `value` to [Base16](\(urls.base16)). - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Encode to Base16" - source: """ - encode_base16("please encode me") - """ - return: "706c6561736520656e636f6465206d65" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_base64.cue b/website/cue/reference/remap/functions/encode_base64.cue deleted file mode 100644 index 840d8cd321337..0000000000000 --- a/website/cue/reference/remap/functions/encode_base64.cue +++ /dev/null @@ -1,61 +0,0 @@ -package metadata - -remap: functions: encode_base64: { - category: "Codec" - description: """ - Encodes the `value` to [Base64](\(urls.base64)). - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - { - name: "padding" - description: "Whether the Base64 output is [padded](\(urls.base64_padding))." - required: false - type: ["boolean"] - default: true - }, - { - name: "charset" - description: "The character set to use when encoding the data." - required: false - type: ["string"] - default: "standard" - enum: { - standard: "[Standard](\(urls.base64_standard)) Base64 format." - url_safe: "Modified Base64 for [URL variants](\(urls.base64_url_safe))." - } - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Encode to Base64 (default)" - source: """ - encode_base64("please encode me") - """ - return: "cGxlYXNlIGVuY29kZSBtZQ==" - }, - { - title: "Encode to Base64 (without padding)" - source: """ - encode_base64("please encode me, no padding though", padding: false) - """ - return: "cGxlYXNlIGVuY29kZSBtZSwgbm8gcGFkZGluZyB0aG91Z2g" - }, - { - title: "Encode to Base64 (URL safe)" - source: """ - encode_base64("please encode me, but safe for URLs", charset: "url_safe") - """ - return: "cGxlYXNlIGVuY29kZSBtZSwgYnV0IHNhZmUgZm9yIFVSTHM=" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_charset.cue b/website/cue/reference/remap/functions/encode_charset.cue deleted file mode 100644 index 9bb98b04adafa..0000000000000 --- a/website/cue/reference/remap/functions/encode_charset.cue +++ /dev/null @@ -1,52 +0,0 @@ -package metadata - -remap: functions: encode_charset: { - category: "Codec" - description: """ - Encodes the `value` (a UTF8 string) to a non-UTF8 string using the specified [character set](\(urls.charset_standard)). - """ - - arguments: [ - { - name: "value" - description: "The UTF8 string to encode." - required: true - type: ["string"] - }, - { - name: "to_charset" - description: "The [character set](\(urls.charset_standard)) to use when encoding the data." - required: true - type: ["string"] - - }, - ] - internal_failure_reasons: [ - "`to_charset` isn't a valid [character set](\(urls.charset_standard)).", - ] - return: types: ["string"] - - examples: [ - { - title: "Encode UTF8 string to EUC-KR" - source: """ - encode_base64(encode_charset!("안녕하세요", "euc-kr")) - """ - return: "vsiz58fPvLy/5A==" - }, - { - title: "Encode UTF8 string to EUC-JP" - source: """ - encode_base64(encode_charset!("こんにちは", "euc-jp")) - """ - return: "pLOk86TLpMGkzw==" - }, - { - title: "Encode UTF8 string to GB2312" - source: """ - encode_base64(encode_charset!("你好", "gb2312")) - """ - return: "xOO6ww==" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_gzip.cue b/website/cue/reference/remap/functions/encode_gzip.cue deleted file mode 100644 index a30c545dfe8ec..0000000000000 --- a/website/cue/reference/remap/functions/encode_gzip.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: encode_gzip: { - category: "Codec" - description: """ - Encodes the `value` to [Gzip](\(urls.gzip)). - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - { - name: "compression_level" - description: "The default compression level." - required: false - type: ["integer"] - default: 6 - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Encode to Gzip" - source: #""" - encoded_text = encode_gzip("please encode me") - encode_base64(encoded_text) - """# - return: "H4sIAAAAAAAA/yvISU0sTlVIzUvOT0lVyE0FAI4R4vcQAAAA" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_json.cue b/website/cue/reference/remap/functions/encode_json.cue deleted file mode 100644 index 1757905cf7ee6..0000000000000 --- a/website/cue/reference/remap/functions/encode_json.cue +++ /dev/null @@ -1,35 +0,0 @@ -package metadata - -remap: functions: encode_json: { - category: "Codec" - description: """ - Encodes the `value` to JSON. - """ - - arguments: [ - { - name: "value" - description: "The value to convert to a JSON string." - required: true - type: ["any"] - }, - { - name: "pretty" - description: "Whether to pretty print the JSON string or not." - required: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Encode to JSON" - source: #""" - .payload = encode_json({"hello": "world"}) - """# - return: #"{"hello":"world"}"# - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_key_value.cue b/website/cue/reference/remap/functions/encode_key_value.cue deleted file mode 100644 index 2af964dab8f6b..0000000000000 --- a/website/cue/reference/remap/functions/encode_key_value.cue +++ /dev/null @@ -1,108 +0,0 @@ -package metadata - -remap: functions: encode_key_value: { - category: "Codec" - description: """ - Encodes the `value` into key-value format with customizable delimiters. Default delimiters match - the [logfmt](\(urls.logfmt)) format. - """ - notices: [ - """ - If `fields_ordering` is specified then the function is fallible else it is infallible. - """, - ] - - arguments: [ - { - name: "value" - description: "The value to convert to a string." - required: true - type: ["object"] - }, - { - name: "fields_ordering" - description: "The ordering of fields to preserve. Any fields not in this list are listed unordered, after all ordered fields." - required: false - type: ["array"] - }, - { - name: "key_value_delimiter" - description: "The string that separates the key from the value." - required: false - default: "=" - type: ["string"] - }, - { - name: "field_delimiter" - description: "The string that separates each key-value pair." - required: false - default: " " - type: ["string"] - }, - { - name: "flatten_boolean" - description: "Whether to encode key-value with a boolean value as a standalone key if `true` and nothing if `false`." - required: false - type: ["boolean"] - default: false - }, - ] - internal_failure_reasons: [ - "`fields_ordering` contains a non-string element.", - ] - return: types: ["string"] - - examples: [ - { - title: "Encode with default delimiters (no ordering)" - source: """ - encode_key_value({"ts": "2021-06-05T17:20:00Z", "msg": "This is a message", "lvl": "info"}) - """ - return: #"lvl=info msg="This is a message" ts=2021-06-05T17:20:00Z"# - }, - { - title: "Encode with default delimiters (fields ordering)" - source: """ - encode_key_value!({"ts": "2021-06-05T17:20:00Z", "msg": "This is a message", "lvl": "info", "log_id": 12345}, ["ts", "lvl", "msg"]) - """ - return: #"ts=2021-06-05T17:20:00Z lvl=info msg="This is a message" log_id=12345"# - }, - { - title: "Encode with default delimiters (nested fields)" - source: """ - encode_key_value({"agent": {"name": "foo"}, "log": {"file": {"path": "my.log"}}, "event": "log"}) - """ - return: #"agent.name=foo event=log log.file.path=my.log"# - }, - { - title: "Encode with default delimiters (nested fields ordering)" - source: """ - encode_key_value!({"agent": {"name": "foo"}, "log": {"file": {"path": "my.log"}}, "event": "log"}, ["event", "log.file.path", "agent.name"]) - """ - return: #"event=log log.file.path=my.log agent.name=foo"# - }, - { - title: "Encode with custom delimiters (no ordering)" - source: """ - encode_key_value( - {"ts": "2021-06-05T17:20:00Z", "msg": "This is a message", "lvl": "info"}, - field_delimiter: ",", - key_value_delimiter: ":" - ) - """ - return: #"lvl:info,msg:"This is a message",ts:2021-06-05T17:20:00Z"# - }, - { - title: "Encode with custom delimiters and flatten boolean" - source: """ - encode_key_value( - {"ts": "2021-06-05T17:20:00Z", "msg": "This is a message", "lvl": "info", "beta": true, "dropped": false}, - field_delimiter: ",", - key_value_delimiter: ":", - flatten_boolean: true - ) - """ - return: #"beta,lvl:info,msg:"This is a message",ts:2021-06-05T17:20:00Z"# - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_logfmt.cue b/website/cue/reference/remap/functions/encode_logfmt.cue deleted file mode 100644 index 9fa01e746a7b4..0000000000000 --- a/website/cue/reference/remap/functions/encode_logfmt.cue +++ /dev/null @@ -1,59 +0,0 @@ -package metadata - -remap: functions: encode_logfmt: { - category: "Codec" - description: """ - Encodes the `value` to [logfmt](\(urls.logfmt)). - """ - notices: functions.encode_key_value.notices - - arguments: [ - { - name: "value" - description: "The value to convert to a logfmt string." - required: true - type: ["object"] - }, - { - name: "fields_ordering" - description: "The ordering of fields to preserve. Any fields not in this list are listed unordered, after all ordered fields." - required: false - type: ["array"] - }, - ] - internal_failure_reasons: [ - "`fields_ordering` contains a non-string element.", - ] - return: types: ["string"] - - examples: [ - { - title: "Encode to logfmt (no ordering)" - source: """ - encode_logfmt({"ts": "2021-06-05T17:20:00Z", "msg": "This is a message", "lvl": "info"}) - """ - return: #"lvl=info msg="This is a message" ts=2021-06-05T17:20:00Z"# - }, - { - title: "Encode to logfmt (fields ordering)" - source: """ - encode_logfmt!({"ts": "2021-06-05T17:20:00Z", "msg": "This is a message", "lvl": "info", "log_id": 12345}, ["ts", "lvl", "msg"]) - """ - return: #"ts=2021-06-05T17:20:00Z lvl=info msg="This is a message" log_id=12345"# - }, - { - title: "Encode to logfmt (nested fields)" - source: """ - encode_logfmt({"agent": {"name": "foo"}, "log": {"file": {"path": "my.log"}}, "event": "log"}) - """ - return: #"agent.name=foo event=log log.file.path=my.log"# - }, - { - title: "Encode to logfmt (nested fields ordering)" - source: """ - encode_logfmt!({"agent": {"name": "foo"}, "log": {"file": {"path": "my.log"}}, "event": "log"}, ["event", "log.file.path", "agent.name"]) - """ - return: #"event=log log.file.path=my.log agent.name=foo"# - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_lz4.cue b/website/cue/reference/remap/functions/encode_lz4.cue deleted file mode 100644 index 66ebe7e45bcd9..0000000000000 --- a/website/cue/reference/remap/functions/encode_lz4.cue +++ /dev/null @@ -1,39 +0,0 @@ -package metadata - -remap: functions: encode_lz4: { - category: "Codec" - description: """ - Encodes the `value` to [Lz4](\(urls.lz4)). This function compresses the input string into an lz4 block. - If `prepend_size` is set to `true`, it prepends the original uncompressed size to the compressed data. - This is useful for some implementations of lz4 that require the original size to be known before decoding. - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - { - name: "prepend_size" - description: "Whether to prepend the original size to the compressed data." - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Encode to Lz4" - source: #""" - encoded_text = encode_lz4!("The quick brown fox jumps over 13 lazy dogs.") - encode_base64(encoded_text) - """# - return: "LAAAAPAdVGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIDEzIGxhenkgZG9ncy4=" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_percent.cue b/website/cue/reference/remap/functions/encode_percent.cue deleted file mode 100644 index c6f9a3d61b0c7..0000000000000 --- a/website/cue/reference/remap/functions/encode_percent.cue +++ /dev/null @@ -1,54 +0,0 @@ -package metadata - -remap: functions: encode_percent: { - category: "Codec" - description: """ - Encodes a `value` with [percent encoding](\(urls.percent_encoded_bytes)) to safely be used in URLs. - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - { - name: "ascii_set" - description: "The ASCII set to use when encoding the data." - required: false - type: ["string"] - default: "NON_ALPHANUMERIC" - enum: { - NON_ALPHANUMERIC: "Encode any non-alphanumeric characters. This is the safest option." - CONTROLS: "Encode only [control characters](\(urls.percent_encoding_controls))." - FRAGMENT: "Encode only [fragment characters](\(urls.percent_encoding_fragment))" - QUERY: "Encode only [query characters](\(urls.percent_encoding_query))" - SPECIAL: "Encode only [special characters](\(urls.percent_encoding_special))" - PATH: "Encode only [path characters](\(urls.percent_encoding_path))" - USERINFO: "Encode only [userinfo characters](\(urls.percent_encoding_userinfo))" - COMPONENT: "Encode only [component characters](\(urls.percent_encoding_component))" - WWW_FORM_URLENCODED: "Encode only [`application/x-www-form-urlencoded`](\(urls.percent_encoding_www_form_urlencoded))" - } - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Percent encode all non-alphanumeric characters (default)" - source: """ - encode_percent("foo bar?") - """ - return: "foo%20bar%3F" - }, - { - title: "Percent encode only control characters" - source: """ - encode_percent("foo \tbar", ascii_set: "CONTROLS") - """ - return: "foo %09bar" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_proto.cue b/website/cue/reference/remap/functions/encode_proto.cue deleted file mode 100644 index 253bcf01e9241..0000000000000 --- a/website/cue/reference/remap/functions/encode_proto.cue +++ /dev/null @@ -1,52 +0,0 @@ -package metadata - -remap: functions: encode_proto: { - category: "Codec" - description: """ - Encodes the `value` into a protocol buffer payload. - """ - - arguments: [ - { - name: "value" - description: "The object to convert to a protocol buffer payload." - required: true - type: ["object"] - }, - { - name: "desc_file" - description: """ - The path to the protobuf descriptor set file. Must be a literal string. - - This file is the output of protoc -o ... - """ - required: true - type: ["string"] - }, - { - name: "message_type" - description: """ - The name of the message type to use for serializing. - - Must be a literal string. - """ - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`desc_file` file does not exist.", - "`message_type` message type does not exist in the descriptor file.", - ] - return: types: ["string"] - - examples: [ - { - title: "Encode to proto" - source: #""" - .payload = encode_base64(encode_proto!({"name": "someone", "phones": [{"number": "123456"}]}, "resources/protobuf_descriptor_set.desc", "test_protobuf.Person")) - """# - return: #"Cgdzb21lb25lIggKBjEyMzQ1Ng=="# - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_punycode.cue b/website/cue/reference/remap/functions/encode_punycode.cue deleted file mode 100644 index d5d72439f7a86..0000000000000 --- a/website/cue/reference/remap/functions/encode_punycode.cue +++ /dev/null @@ -1,59 +0,0 @@ -package metadata - -remap: functions: encode_punycode: { - category: "Codec" - description: """ - Encodes a `value` to [punycode](\(urls.punycode)). Useful for internationalized domain names ([IDN](\(urls.idn))). This function assumes that the value passed is meant to be used in IDN context and that it is either a domain name or a part of it. - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - { - name: "validate" - description: "Whether to validate the input string to check if it is a valid domain name." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [ - "`value` can not be encoded to `punycode`", - ] - return: types: ["string"] - - examples: [ - { - title: "Encode an internationalized domain name" - source: """ - encode_punycode!("www.café.com") - """ - return: "www.xn--caf-dma.com" - }, - { - title: "Encode an internationalized domain name with mixed case" - source: """ - encode_punycode!("www.CAFé.com") - """ - return: "www.xn--caf-dma.com" - }, - { - title: "Encode an ASCII only string" - source: """ - encode_punycode!("www.cafe.com") - """ - return: "www.cafe.com" - }, - { - title: "Ignore validation" - source: """ - encode_punycode!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.", validate: false) - """ - return: "xn--8hbb.xn--fiba.xn--8hbf.xn--eib." - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_snappy.cue b/website/cue/reference/remap/functions/encode_snappy.cue deleted file mode 100644 index f8a56c9ca653a..0000000000000 --- a/website/cue/reference/remap/functions/encode_snappy.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: encode_snappy: { - category: "Codec" - description: """ - Encodes the `value` to Snappy. - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` cannot be encoded into a Snappy string.", - ] - return: types: ["string"] - - examples: [ - { - title: "Encode to Snappy" - source: #""" - encoded_text = encode_snappy!("The quick brown fox jumps over 13 lazy dogs.") - encode_base64(encoded_text) - """# - return: "LKxUaGUgcXVpY2sgYnJvd24gZm94IGp1bXBzIG92ZXIgMTMgbGF6eSBkb2dzLg==" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_zlib.cue b/website/cue/reference/remap/functions/encode_zlib.cue deleted file mode 100644 index 73921d92c3a17..0000000000000 --- a/website/cue/reference/remap/functions/encode_zlib.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: encode_zlib: { - category: "Codec" - description: """ - Encodes the `value` to [Zlib](\(urls.zlib)). - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - { - name: "compression_level" - description: "The default compression level." - required: false - type: ["integer"] - default: 6 - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Encode to Zlib" - source: #""" - encoded_text = encode_zlib("please encode me") - encode_base64(encoded_text) - """# - return: "eJwryElNLE5VSM1Lzk9JVchNBQA0RQX7" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encode_zstd.cue b/website/cue/reference/remap/functions/encode_zstd.cue deleted file mode 100644 index fb9cf17296a78..0000000000000 --- a/website/cue/reference/remap/functions/encode_zstd.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: encode_zstd: { - category: "Codec" - description: """ - Encodes the `value` to [Zstandard](\(urls.zstd)). - """ - - arguments: [ - { - name: "value" - description: "The string to encode." - required: true - type: ["string"] - }, - { - name: "compression_level" - description: "The default compression level." - required: false - type: ["integer"] - default: 3 - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Encode to Zstd" - source: #""" - encoded_text = encode_zstd("please encode me") - encode_base64(encoded_text) - """# - return: "KLUv/QBYgQAAcGxlYXNlIGVuY29kZSBtZQ==" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encrypt.cue b/website/cue/reference/remap/functions/encrypt.cue deleted file mode 100644 index 0703654e9ba2e..0000000000000 --- a/website/cue/reference/remap/functions/encrypt.cue +++ /dev/null @@ -1,93 +0,0 @@ -package metadata - -remap: functions: encrypt: { - category: "Cryptography" - description: """ - Encrypts a string with a symmetric encryption algorithm. - - Supported Algorithms: - - * AES-256-CFB (key = 32 bytes, iv = 16 bytes) - * AES-192-CFB (key = 24 bytes, iv = 16 bytes) - * AES-128-CFB (key = 16 bytes, iv = 16 bytes) - * AES-256-OFB (key = 32 bytes, iv = 16 bytes) - * AES-192-OFB (key = 24 bytes, iv = 16 bytes) - * AES-128-OFB (key = 16 bytes, iv = 16 bytes) - * AES-128-SIV (key = 32 bytes, iv = 16 bytes) - * AES-256-SIV (key = 64 bytes, iv = 16 bytes) - * Deprecated - AES-256-CTR (key = 32 bytes, iv = 16 bytes) - * Deprecated - AES-192-CTR (key = 24 bytes, iv = 16 bytes) - * Deprecated - AES-128-CTR (key = 16 bytes, iv = 16 bytes) - * AES-256-CTR-LE (key = 32 bytes, iv = 16 bytes) - * AES-192-CTR-LE (key = 24 bytes, iv = 16 bytes) - * AES-128-CTR-LE (key = 16 bytes, iv = 16 bytes) - * AES-256-CTR-BE (key = 32 bytes, iv = 16 bytes) - * AES-192-CTR-BE (key = 24 bytes, iv = 16 bytes) - * AES-128-CTR-BE (key = 16 bytes, iv = 16 bytes) - * AES-256-CBC-PKCS7 (key = 32 bytes, iv = 16 bytes) - * AES-192-CBC-PKCS7 (key = 24 bytes, iv = 16 bytes) - * AES-128-CBC-PKCS7 (key = 16 bytes, iv = 16 bytes) - * AES-256-CBC-ANSIX923 (key = 32 bytes, iv = 16 bytes) - * AES-192-CBC-ANSIX923 (key = 24 bytes, iv = 16 bytes) - * AES-128-CBC-ANSIX923 (key = 16 bytes, iv = 16 bytes) - * AES-256-CBC-ISO7816 (key = 32 bytes, iv = 16 bytes) - * AES-192-CBC-ISO7816 (key = 24 bytes, iv = 16 bytes) - * AES-128-CBC-ISO7816 (key = 16 bytes, iv = 16 bytes) - * AES-256-CBC-ISO10126 (key = 32 bytes, iv = 16 bytes) - * AES-192-CBC-ISO10126 (key = 24 bytes, iv = 16 bytes) - * AES-128-CBC-ISO10126 (key = 16 bytes, iv = 16 bytes) - * CHACHA20-POLY1305 (key = 32 bytes, iv = 12 bytes) - * XCHACHA20-POLY1305 (key = 32 bytes, iv = 24 bytes) - * XSALSA20-POLY1305 (key = 32 bytes, iv = 24 bytes) - """ - - arguments: [ - { - name: "plaintext" - description: "The string to encrypt." - required: true - type: ["string"] - }, - { - name: "algorithm" - description: "The algorithm to use." - required: true - type: ["string"] - }, - { - name: "key" - description: "The key in raw bytes (not encoded) for encryption. The length must match the algorithm requested." - required: true - type: ["string"] - }, - { - name: "iv" - description: #""" - The IV in raw bytes (not encoded) for encryption. The length must match the algorithm requested. - A new IV should be generated for every message. You can use `random_bytes` to generate a cryptographically secure random value. - """# - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`algorithm` is not a supported algorithm.", - "`key` length does not match the key size required for the algorithm specified.", - "`iv` length does not match the `iv` size required for the algorithm specified.", - ] - return: types: ["string"] - - examples: [ - { - title: "Encrypt value" - source: #""" - plaintext = "super secret message" - iv = "1234567890123456" # typically you would call random_bytes(16) - key = "16_byte_keyxxxxx" - encrypted_message = encrypt!(plaintext, "AES-128-CBC-PKCS7", key, iv: iv) - encode_base64(encrypted_message) - """# - return: "GBw8Mu00v0Kc38+/PvsVtGgWuUJ+ZNLgF8Opy8ohIYE=" - }, - ] -} diff --git a/website/cue/reference/remap/functions/encrypt_ip.cue b/website/cue/reference/remap/functions/encrypt_ip.cue deleted file mode 100644 index 5888f2c22055a..0000000000000 --- a/website/cue/reference/remap/functions/encrypt_ip.cue +++ /dev/null @@ -1,80 +0,0 @@ -package metadata - -remap: functions: encrypt_ip: { - category: "IP" - description: """ - Encrypts an IP address, transforming it into a different valid IP address. - - Supported Modes: - - * AES128 - Scrambles the entire IP address using AES-128 encryption. Can transform between IPv4 and IPv6. - * PFX (Prefix-preserving) - Maintains network hierarchy by ensuring that IP addresses within the same network are encrypted to addresses that also share a common network. This preserves prefix relationships while providing confidentiality. - """ - notices: [ - """ - The `aes128` mode implements the `ipcrypt-deterministic` algorithm from the IPCrypt specification, while the `pfx` mode implements the `ipcrypt-pfx` algorithm. Both modes provide deterministic encryption where the same input IP address encrypted with the same key will always produce the same encrypted output. - """, - ] - - arguments: [ - { - name: "ip" - description: "The IP address to encrypt (v4 or v6)." - required: true - type: ["string"] - }, - { - name: "key" - description: "The encryption key in raw bytes (not encoded). For AES128 mode, the key must be exactly 16 bytes. For PFX mode, the key must be exactly 32 bytes." - required: true - type: ["string"] - }, - { - name: "mode" - description: "The encryption mode to use. Must be either `aes128` or `pfx`." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`ip` is not a valid IP address.", - "`mode` is not a supported mode (must be `aes128` or `pfx`).", - "`key` length does not match the requirements for the specified mode (16 bytes for `aes128`, 32 bytes for `pfx`).", - ] - return: types: ["string"] - - examples: [ - { - title: "Encrypt IPv4 address with AES128" - source: #""" - encrypted_ip = encrypt_ip!("192.168.1.1", "sixteen byte key", "aes128") - encrypted_ip - """# - return: "72b9:a747:f2e9:72af:76ca:5866:6dcf:c3b0" - }, - { - title: "Encrypt IPv6 address with AES128" - source: #""" - encrypted_ip = encrypt_ip!("2001:db8::1", "sixteen byte key", "aes128") - encrypted_ip - """# - return: "c0e6:eb35:6887:f554:4c65:8ace:17ca:6c6a" - }, - { - title: "Encrypt IPv4 address with prefix-preserving mode" - source: #""" - encrypted_ip = encrypt_ip!("192.168.1.1", "thirty-two bytes key for pfx use", "pfx") - encrypted_ip - """# - return: "33.245.248.61" - }, - { - title: "Encrypt IPv6 address with prefix-preserving mode" - source: #""" - encrypted_ip = encrypt_ip!("2001:db8::1", "thirty-two bytes key for ipv6pfx", "pfx") - encrypted_ip - """# - return: "88bd:d2bf:8865:8c4d:84b:44f6:6077:72c9" - }, - ] -} diff --git a/website/cue/reference/remap/functions/ends_with.cue b/website/cue/reference/remap/functions/ends_with.cue deleted file mode 100644 index 4251fe0146ea4..0000000000000 --- a/website/cue/reference/remap/functions/ends_with.cue +++ /dev/null @@ -1,49 +0,0 @@ -package metadata - -remap: functions: ends_with: { - category: "String" - description: """ - Determines whether the `value` string ends with the specified `substring`. - """ - - arguments: [ - { - name: "value" - description: "The string to search." - required: true - type: ["string"] - }, - { - name: "substring" - description: "The substring with which `value` must end." - required: true - type: ["string"] - }, - { - name: "case_sensitive" - description: "Whether the match should be case sensitive." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "String ends with (case sensitive)" - source: #""" - ends_with("The Needle In The Haystack", "The Haystack") - """# - return: true - }, - { - title: "String ends with (case insensitive)" - source: #""" - ends_with("The Needle In The Haystack", "the haystack", case_sensitive: false) - """# - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/exists.cue b/website/cue/reference/remap/functions/exists.cue deleted file mode 100644 index ec37aa5ae7558..0000000000000 --- a/website/cue/reference/remap/functions/exists.cue +++ /dev/null @@ -1,43 +0,0 @@ -package metadata - -remap: functions: exists: { - category: "Path" - description: """ - Checks whether the `path` exists for the target. - - This function distinguishes between a missing path - and a path with a `null` value. A regular path lookup, - such as `.foo`, cannot distinguish between the two cases - since it always returns `null` if the path doesn't exist. - """ - - arguments: [ - { - name: "path" - description: "The path of the field to check." - required: true - type: ["path"] - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "Exists (field)" - input: log: field: 1 - source: #""" - exists(.field) - """# - return: true - }, - { - title: "Exists (array element)" - input: log: array: [1, 2, 3] - source: #""" - exists(.array[2]) - """# - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/filter.cue b/website/cue/reference/remap/functions/filter.cue deleted file mode 100644 index 690d4b845234a..0000000000000 --- a/website/cue/reference/remap/functions/filter.cue +++ /dev/null @@ -1,50 +0,0 @@ -package metadata - -remap: functions: filter: { - category: "Enumerate" - description: """ - Filter elements from a collection. - - This function currently *does not* support recursive iteration. - - The function uses the function closure syntax to allow reading - the key-value or index-value combination for each item in the - collection. - - The same scoping rules apply to closure blocks as they do for - regular blocks. This means that any variable defined in parent scopes - is accessible, and mutations to those variables are preserved, - but any new variables instantiated in the closure block are - unavailable outside of the block. - - See the examples below to learn about the closure syntax. - """ - - arguments: [ - { - name: "value" - description: "The array or object to filter." - required: true - type: ["array", "object"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array", "object"] - } - examples: [ - { - title: "Filter elements" - input: log: { - tags: ["foo", "bar", "foo", "baz"] - } - source: #""" - filter(array!(.tags)) -> |_index, value| { - # keep any elements that aren't equal to "foo" - value != "foo" - } - """# - return: ["bar", "baz"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/find.cue b/website/cue/reference/remap/functions/find.cue deleted file mode 100644 index 5cf6bd3ab1283..0000000000000 --- a/website/cue/reference/remap/functions/find.cue +++ /dev/null @@ -1,64 +0,0 @@ -package metadata - -remap: functions: find: { - category: "String" - description: """ - Determines from left to right the start position of the first found element in `value` - that matches `pattern`. Returns `-1` if not found. - """ - - arguments: [ - { - name: "value" - description: "The string to find the pattern in." - required: true - type: ["string"] - }, - { - name: "pattern" - description: "The regular expression or string pattern to match against." - required: true - type: ["regex", "string"] - }, - { - name: "from" - description: "Offset to start searching." - required: false - default: 0 - type: ["integer"] - }, - ] - internal_failure_reasons: [] - return: types: ["integer"] - - examples: [ - { - title: "Match text" - source: #""" - find("foobar", "foo") - """# - return: 0 - }, - { - title: "Match regex" - source: #""" - find("foobar", r'b.r') - """# - return: 3 - }, - { - title: "No matches" - source: #""" - find("foobar", "baz") - """# - return: null - }, - { - title: "With an offset" - source: #""" - find("foobarfoobarfoo", "bar", 4) - """# - return: 9 - }, - ] -} diff --git a/website/cue/reference/remap/functions/find_enrichment_table_records.cue b/website/cue/reference/remap/functions/find_enrichment_table_records.cue deleted file mode 100644 index 1848f163edd4d..0000000000000 --- a/website/cue/reference/remap/functions/find_enrichment_table_records.cue +++ /dev/null @@ -1,93 +0,0 @@ -package metadata - -remap: functions: find_enrichment_table_records: { - category: "Enrichment" - description: """ - Searches an [enrichment table](\(urls.enrichment_tables_concept)) for rows that match the - provided condition. - - \(remap._enrichment_table_explainer) - """ - - arguments: [ - { - name: "table" - description: "The [enrichment table](\(urls.enrichment_tables_concept)) to search." - required: true - type: ["string"] - }, - { - name: "condition" - description: """ - The condition to search on. Since the condition is used at boot time to create - indices into the data, these conditions must be statically defined. - """ - required: true - type: ["object"] - }, - { - name: "select" - description: """ - A subset of fields from the enrichment table to return. If not specified, - all fields are returned. - """ - required: false - type: ["array"] - }, - { - name: "case_sensitive" - description: "Whether text fields need to match cases exactly." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [] - return: types: ["array"] - - examples: [ - { - title: "Exact match" - source: #""" - find_enrichment_table_records!("test", - { - "surname": "smith", - }, - case_sensitive: false) - """# - return: [{"id": 1, "firstname": "Bob", "surname": "Smith"}, - {"id": 2, "firstname": "Fred", "surname": "Smith"}, - ] - }, - { - title: "Wildcard match" - source: #""" - find_enrichment_table_records!("test", - { - "firstname": "Bob", - }, - wildcard: "fred", - case_sensitive: false) - """# - return: [{"id": 1, "firstname": "Bob", "surname": "Smith"}, - {"id": 2, "firstname": "Fred", "surname": "Smith"}, - ] - }, - { - title: "Date range search" - source: #""" - find_enrichment_table_records!("test", - { - "surname": "Smith", - "date_of_birth": { - "from": t'1985-01-01T00:00:00Z', - "to": t'1985-12-31T00:00:00Z' - } - }) - """# - return: [{"id": 1, "firstname": "Bob", "surname": "Smith"}, - {"id": 2, "firstname": "Fred", "surname": "Smith"}, - ] - }, - ] -} diff --git a/website/cue/reference/remap/functions/find_vector_metrics.cue b/website/cue/reference/remap/functions/find_vector_metrics.cue deleted file mode 100644 index dbfbd230a859d..0000000000000 --- a/website/cue/reference/remap/functions/find_vector_metrics.cue +++ /dev/null @@ -1,48 +0,0 @@ -package metadata - -remap: functions: find_vector_metrics: { - category: "Metrics" - description: """ - Searches internal Vector metrics by name and optionally by tags. Returns all matching - metrics. - - \(remap._vector_metrics_explainer) - """ - - arguments: [ - { - name: "key" - description: "The metric name to search." - required: true - type: ["string"] - }, - { - name: "tags" - description: """ - Tags to filter the results on. Values in this object support wildcards ('*') to - match on parts of the tag value. - """ - required: false - type: ["object"] - }, - ] - internal_failure_reasons: [] - return: types: ["array"] - - examples: [ - { - title: "Find vector internal metrics matching the name" - source: #""" - find_vector_metrics("utilization") - """# - return: [{"name": "utilization", "tags": {"component_id": ["test"]}, "type": "gauge", "kind": "absolute", "value": 0.5}] - }, - { - title: "Find vector internal metrics matching the name and tags" - source: #""" - find_vector_metrics("utilization", tags: {"component_id": "test"}) - """# - return: [{"name": "utilization", "tags": {"component_id": ["test"]}, "type": "gauge", "kind": "absolute", "value": 0.5}] - }, - ] -} diff --git a/website/cue/reference/remap/functions/flatten.cue b/website/cue/reference/remap/functions/flatten.cue deleted file mode 100644 index 13f86f79a2318..0000000000000 --- a/website/cue/reference/remap/functions/flatten.cue +++ /dev/null @@ -1,60 +0,0 @@ -package metadata - -remap: functions: flatten: { - category: "Enumerate" - description: #""" - Flattens the `value` into a single-level representation. - """# - - arguments: [ - { - name: "value" - description: "The array or object to flatten." - required: true - type: ["array", "object"] - }, - { - name: "separator" - description: "The separator to join nested keys" - required: false - default: "." - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array", "object"] - rules: [ - "The return type matches the `value` type.", - ] - } - - examples: [ - { - title: "Flatten array" - source: #""" - flatten([1, [2, 3, 4], [5, [6, 7], 8], 9]) - """# - return: [1, 2, 3, 4, 5, 6, 7, 8, 9] - }, - { - title: "Flatten object" - source: #""" - flatten({ - "parent1": { - "child1": 1, - "child2": 2 - }, - "parent2": { - "child3": 3 - } - }) - """# - return: { - "parent1.child1": 1 - "parent1.child2": 2 - "parent2.child3": 3 - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/float.cue b/website/cue/reference/remap/functions/float.cue deleted file mode 100644 index 122390344dcfa..0000000000000 --- a/website/cue/reference/remap/functions/float.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: float: { - category: "Type" - description: """ - Returns `value` if it is a float, otherwise returns an error. This enables the type checker to guarantee that the - returned value is a float and can be used in any function that expects a float. - """ - - arguments: [ - { - name: "value" - description: "The value to check if it is a float." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [ - "`value` is not a float.", - ] - return: { - types: ["float"] - rules: [ - #"Returns the `value` if it's a float."#, - #"Raises an error if not a float."#, - ] - } - examples: [ - { - title: "Declare a float type" - input: log: value: 42.0 - source: #""" - float!(.value) - """# - return: input.log.value - }, - ] -} diff --git a/website/cue/reference/remap/functions/floor.cue b/website/cue/reference/remap/functions/floor.cue deleted file mode 100644 index 3c9bb578297c7..0000000000000 --- a/website/cue/reference/remap/functions/floor.cue +++ /dev/null @@ -1,48 +0,0 @@ -package metadata - -remap: functions: floor: { - category: "Number" - description: #""" - Rounds the `value` down to the specified `precision`. - """# - - arguments: [ - { - name: "value" - description: "The number to round down." - required: true - type: ["integer", "float"] - }, - { - name: "precision" - description: "The number of decimal places to round to." - required: false - default: 0 - type: ["integer"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["integer", "float"] - rules: [ - "Returns an integer if `precision` is `0` (this is the default). Returns a float otherwise.", - ] - } - - examples: [ - { - title: "Round a number down (without precision)" - source: #""" - floor(4.345) - """# - return: 4.0 - }, - { - title: "Round a number down (with precision)" - source: #""" - floor(4.345, precision: 2) - """# - return: 4.34 - }, - ] -} diff --git a/website/cue/reference/remap/functions/for_each.cue b/website/cue/reference/remap/functions/for_each.cue deleted file mode 100644 index 2d7697e1c5462..0000000000000 --- a/website/cue/reference/remap/functions/for_each.cue +++ /dev/null @@ -1,57 +0,0 @@ -package metadata - -remap: functions: for_each: { - category: "Enumerate" - description: """ - Iterate over a collection. - - This function currently *does not* support recursive iteration. - - The function uses the "function closure syntax" to allow reading - the key/value or index/value combination for each item in the - collection. - - The same scoping rules apply to closure blocks as they do for - regular blocks. This means that any variable defined in parent scopes - is accessible, and mutations to those variables are preserved, - but any new variables instantiated in the closure block are - unavailable outside of the block. - - See the examples below to learn about the closure syntax. - """ - - arguments: [ - { - name: "value" - description: "The array or object to iterate." - required: true - type: ["array", "object"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["null"] - } - examples: [ - { - title: "Tally elements" - input: log: { - tags: ["foo", "bar", "foo", "baz"] - } - source: #""" - tally = {} - for_each(array!(.tags)) -> |_index, value| { - # Get the current tally for the `value`, or - # set to `0`. - count = int(get!(tally, [value])) ?? 0 - - # Increment the tally for the value by `1`. - tally = set!(tally, [value], count + 1) - } - - tally - """# - return: {"foo": 2, "bar": 1, "baz": 1} - }, - ] -} diff --git a/website/cue/reference/remap/functions/format_int.cue b/website/cue/reference/remap/functions/format_int.cue deleted file mode 100644 index dfa689ba42d5f..0000000000000 --- a/website/cue/reference/remap/functions/format_int.cue +++ /dev/null @@ -1,45 +0,0 @@ -package metadata - -remap: functions: format_int: { - category: "Number" - description: #""" - Formats the integer `value` into a string representation using the given base/radix. - """# - - arguments: [ - { - name: "value" - description: "The number to format." - required: true - type: ["integer"] - }, - { - name: "base" - description: "The base to format the number in. Must be between 2 and 36 (inclusive)." - required: false - type: ["integer"] - default: 10 - }, - ] - internal_failure_reasons: [ - "The base is not between 2 and 36.", - ] - return: types: ["string"] - - examples: [ - { - title: "Format as a hexadecimal integer" - source: #""" - format_int!(42, 16) - """# - return: "2a" - }, - { - title: "Format as a negative hexadecimal integer" - source: #""" - format_int!(-42, 16) - """# - return: "-2a" - }, - ] -} diff --git a/website/cue/reference/remap/functions/format_number.cue b/website/cue/reference/remap/functions/format_number.cue deleted file mode 100644 index c785f603790f4..0000000000000 --- a/website/cue/reference/remap/functions/format_number.cue +++ /dev/null @@ -1,49 +0,0 @@ -package metadata - -remap: functions: format_number: { - category: "Number" - description: #""" - Formats the `value` into a string representation of the number. - """# - - arguments: [ - { - name: "value" - description: "The number to format as a string." - required: true - type: ["integer", "float"] - }, - { - name: "scale" - description: "The number of decimal places to display." - required: false - type: ["integer"] - }, - { - name: "decimal_separator" - description: "The character to use between the whole and decimal parts of the number." - required: false - type: ["string"] - default: "." - }, - { - name: "grouping_separator" - description: "The character to use between each thousands part of the number." - required: false - type: ["string"] - default: "," - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Format a number (3 decimals)" - source: #""" - format_number(1234567.89, 3, decimal_separator: ".", grouping_separator: ",") - """# - return: "1,234,567.890" - }, - ] -} diff --git a/website/cue/reference/remap/functions/format_timestamp.cue b/website/cue/reference/remap/functions/format_timestamp.cue deleted file mode 100644 index 21db108fd17a0..0000000000000 --- a/website/cue/reference/remap/functions/format_timestamp.cue +++ /dev/null @@ -1,48 +0,0 @@ -package metadata - -remap: functions: format_timestamp: { - category: "Timestamp" - description: #""" - Formats `value` into a string representation of the timestamp. - """# - - arguments: [ - { - name: "value" - description: "The timestamp to format as text." - required: true - type: ["timestamp"] - }, - { - name: "format" - description: "The format string as described by the [Chrono library](\(urls.chrono_time_formats))." - required: true - type: ["string"] - }, - { - name: "timezone" - description: "The timezone to use when formatting the timestamp. The parameter uses the TZ identifier or `local`." - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Format a timestamp (ISO8601/RFC 3339)" - source: #""" - format_timestamp!(t'2020-10-21T16:00:00Z', format: "%+") - """# - return: "2020-10-21T16:00:00+00:00" - }, - { - title: "Format a timestamp (custom)" - source: #""" - format_timestamp!(t'2020-10-21T16:00:00Z', format: "%v %R") - """# - return: "21-Oct-2020 16:00" - }, - ] -} diff --git a/website/cue/reference/remap/functions/from_unix_timestamp.cue b/website/cue/reference/remap/functions/from_unix_timestamp.cue deleted file mode 100644 index a4eb4a1b08d38..0000000000000 --- a/website/cue/reference/remap/functions/from_unix_timestamp.cue +++ /dev/null @@ -1,58 +0,0 @@ -package metadata - -remap: functions: from_unix_timestamp: { - category: "Convert" - description: """ - Converts the `value` integer from a [Unix timestamp](\(urls.unix_timestamp)) to a VRL `timestamp`. - - Converts from the number of seconds since the Unix epoch by default. To convert from milliseconds or nanoseconds, set the `unit` argument to `milliseconds` or `nanoseconds`. - """ - - arguments: [ - { - name: "value" - description: "The Unix timestamp to convert." - required: true - type: ["integer"] - }, - { - name: "unit" - description: "The time unit." - type: ["string"] - required: false - enum: { - seconds: "Express Unix time in seconds" - milliseconds: "Express Unix time in milliseconds" - nanoseconds: "Express Unix time in nanoseconds" - microseconds: "Express Unix time in microseconds" - } - default: "seconds" - }, - ] - internal_failure_reasons: [] - return: types: ["timestamp"] - - examples: [ - { - title: "Convert from a Unix timestamp (seconds)" - source: #""" - from_unix_timestamp!(5) - """# - return: "1970-01-01T00:00:05Z" - }, - { - title: "Convert from a Unix timestamp (milliseconds)" - source: #""" - from_unix_timestamp!(5000, unit: "milliseconds") - """# - return: "1970-01-01T00:00:05Z" - }, - { - title: "Convert from a Unix timestamp (nanoseconds)" - source: #""" - from_unix_timestamp!(5000, unit: "nanoseconds") - """# - return: "1970-01-01T00:00:00.000005Z" - }, - ] -} diff --git a/website/cue/reference/remap/functions/get.cue b/website/cue/reference/remap/functions/get.cue deleted file mode 100644 index bbdb3060e57df..0000000000000 --- a/website/cue/reference/remap/functions/get.cue +++ /dev/null @@ -1,57 +0,0 @@ -package metadata - -remap: functions: get: { - category: "Path" - description: """ - Dynamically get the value of a given path. - - If you know the path you want to look up, use - static paths such as `.foo.bar[1]` to get the value of that - path. However, if you do not know the path names, - use the dynamic `get` function to get the requested - value. - """ - - arguments: [ - { - name: "value" - description: "The object or array to query." - required: true - type: ["object", "array"] - }, - { - name: "path" - description: "An array of path segments to look for the value." - required: true - type: ["array"] - }, - ] - internal_failure_reasons: [ - #"The `path` segment must be a string or an integer."#, - ] - return: types: ["any"] - - examples: [ - { - title: "single-segment top-level field" - source: #""" - get!(value: { "foo": "bar" }, path: ["foo"]) - """# - return: "bar" - }, - { - title: "multi-segment nested field" - source: #""" - get!(value: { "foo": { "bar": "baz" } }, path: ["foo", "bar"]) - """# - return: "baz" - }, - { - title: "array indexing" - source: #""" - get!(value: ["foo", "bar", "baz"], path: [-2]) - """# - return: "bar" - }, - ] -} diff --git a/website/cue/reference/remap/functions/get_enrichment_table_record.cue b/website/cue/reference/remap/functions/get_enrichment_table_record.cue deleted file mode 100644 index 9aae8266392b0..0000000000000 --- a/website/cue/reference/remap/functions/get_enrichment_table_record.cue +++ /dev/null @@ -1,94 +0,0 @@ -package metadata - -remap: functions: get_enrichment_table_record: { - category: "Enrichment" - description: """ - Searches an [enrichment table](\(urls.enrichment_tables_concept)) for a row that matches the - provided condition. A single row must be matched. If no rows are found or more than one row is - found, an error is returned. - - \(remap._enrichment_table_explainer) - """ - - arguments: [ - { - name: "table" - description: "The [enrichment table](\(urls.enrichment_tables_concept)) to search." - required: true - type: ["string"] - }, - { - name: "condition" - description: """ - The condition to search on. Since the condition is used at boot time to create - indices into the data, these conditions must be statically defined. - """ - required: true - type: ["object"] - }, - { - name: "select" - description: """ - A subset of fields from the enrichment table to return. If not specified, - all fields are returned. - """ - required: false - type: ["array"] - }, - { - name: "case_sensitive" - description: "Whether the text fields match the case exactly." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [ - "The row is not found.", - "Multiple rows are found that match the condition.", - ] - return: types: ["object"] - - examples: [ - { - title: "Exact match" - source: #""" - get_enrichment_table_record!("test", - { - "surname": "bob", - "firstname": "John" - }, - case_sensitive: false) - """# - return: {"id": 1, "firstname": "Bob", "surname": "Smith"} - }, - { - title: "Wildcard match" - source: #""" - find_enrichment_table_records!("test", - { - "firstname": "Bob", - }, - wildcard: "fred", - case_sensitive: false) - """# - return: [{"id": 1, "firstname": "Bob", "surname": "Smith"}, - {"id": 2, "firstname": "Fred", "surname": "Smith"}, - ] - }, - { - title: "Date range search" - source: #""" - get_enrichment_table_record!("test", - { - "surname": "Smith", - "date_of_birth": { - "from": t'1985-01-01T00:00:00Z', - "to": t'1985-12-31T00:00:00Z' - } - }) - """# - return: {"id": 1, "firstname": "Bob", "surname": "Smith"} - }, - ] -} diff --git a/website/cue/reference/remap/functions/get_env_var.cue b/website/cue/reference/remap/functions/get_env_var.cue deleted file mode 100644 index b0cae2474dc17..0000000000000 --- a/website/cue/reference/remap/functions/get_env_var.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: get_env_var: { - category: "System" - description: """ - Returns the value of the environment variable specified by `name`. - """ - - arguments: [ - { - name: "name" - description: "The name of the environment variable." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "Environment variable `name` does not exist.", - "The value of environment variable `name` is not valid Unicode", - ] - return: types: ["string"] - - examples: [ - { - title: "Get an environment variable" - source: #""" - get_env_var!("HOME") - """# - return: "/root" - }, - ] -} diff --git a/website/cue/reference/remap/functions/get_hostname.cue b/website/cue/reference/remap/functions/get_hostname.cue deleted file mode 100644 index 474dc6949f113..0000000000000 --- a/website/cue/reference/remap/functions/get_hostname.cue +++ /dev/null @@ -1,25 +0,0 @@ -package metadata - -remap: functions: get_hostname: { - category: "System" - description: """ - Returns the local system's hostname. - """ - - arguments: [] - internal_failure_reasons: [ - "Internal hostname resolution failed.", - ] - return: types: ["string"] - - examples: [ - { - title: "Get hostname" - input: log: {} - source: #""" - .hostname = get_hostname!() - """# - output: log: hostname: "localhost.localdomain" - }, - ] -} diff --git a/website/cue/reference/remap/functions/get_secret.cue b/website/cue/reference/remap/functions/get_secret.cue deleted file mode 100644 index 8cd5e12844448..0000000000000 --- a/website/cue/reference/remap/functions/get_secret.cue +++ /dev/null @@ -1,31 +0,0 @@ -package metadata - -remap: functions: get_secret: { - category: "Event" - description: """ - Returns the value of the given secret from an event. - """ - - arguments: [ - { - name: "key" - description: """ - The name of the secret. - """ - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Get the Datadog API key from the event metadata" - source: #""" - get_secret("datadog_api_key") - """# - return: "secret value" - }, - ] -} diff --git a/website/cue/reference/remap/functions/get_timezone_name.cue b/website/cue/reference/remap/functions/get_timezone_name.cue deleted file mode 100644 index e094b1208a5b1..0000000000000 --- a/website/cue/reference/remap/functions/get_timezone_name.cue +++ /dev/null @@ -1,31 +0,0 @@ -package metadata - -remap: functions: get_timezone_name: { - category: "System" - description: """ - Returns the name of the timezone in the Vector configuration (see - [global configuration options](\(urls.vector_configuration_global))). - If the configuration is set to `local`, then it attempts to - determine the name of the timezone from the host OS. If this - is not possible, then it returns the fixed offset of the - local timezone for the current time in the format `"[+-]HH:MM"`, - for example, `"+02:00"`. - """ - - arguments: [] - internal_failure_reasons: [ - "Retrieval of local timezone information failed.", - ] - return: types: ["string"] - - examples: [ - { - title: "Get the IANA name of Vector's timezone" - input: log: {} - source: #""" - .vector_timezone = get_timezone_name!() - """# - output: log: vector_timezone: "UTC" - }, - ] -} diff --git a/website/cue/reference/remap/functions/get_vector_metric.cue b/website/cue/reference/remap/functions/get_vector_metric.cue deleted file mode 100644 index 2ecb5ee6948f5..0000000000000 --- a/website/cue/reference/remap/functions/get_vector_metric.cue +++ /dev/null @@ -1,48 +0,0 @@ -package metadata - -remap: functions: get_vector_metric: { - category: "Metrics" - description: """ - Searches internal Vector metrics by name and optionally by tags. Returns the first matching - metric. - - \(remap._vector_metrics_explainer) - """ - - arguments: [ - { - name: "key" - description: "The metric name to search." - required: true - type: ["string"] - }, - { - name: "tags" - description: """ - Tags to filter the results on. Values in this object support wildcards ('*') to - match on parts of the tag value. - """ - required: false - type: ["object"] - }, - ] - internal_failure_reasons: [] - return: types: ["object"] - - examples: [ - { - title: "Get a vector internal metric matching the name" - source: #""" - get_vector_metric("utilization") - """# - return: {"name": "utilization", "tags": {"component_id": ["test"]}, "type": "gauge", "kind": "absolute", "value": 0.5} - }, - { - title: "Get a vector internal metric matching the name and tags" - source: #""" - get_vector_metric("utilization", tags: {"component_id": "test"}) - """# - return: {"name": "utilization", "tags": {"component_id": ["test"]}, "type": "gauge", "kind": "absolute", "value": 0.5} - }, - ] -} diff --git a/website/cue/reference/remap/functions/haversine.cue b/website/cue/reference/remap/functions/haversine.cue deleted file mode 100644 index ce186df426fea..0000000000000 --- a/website/cue/reference/remap/functions/haversine.cue +++ /dev/null @@ -1,72 +0,0 @@ -package metadata - -remap: functions: haversine: { - category: "Map" - description: """ - Calculates [haversine](\(urls.haversine)) distance and bearing between two points. - Results are available in kilometers or miles. - """ - - arguments: [ - { - name: "latitude1" - description: "Latitude of the first point." - required: true - type: ["float"] - }, - { - name: "longitude1" - description: "Longitude of the first point." - required: true - type: ["float"] - }, - { - name: "latitude2" - description: "Latitude of the second point." - required: true - type: ["float"] - }, - { - name: "longitude2" - description: "Longitude of the second point." - required: true - type: ["float"] - }, - { - name: "measurement" - description: "Measurement system to use for resulting distance." - required: false - type: ["string"] - default: "kilometers" - enum: { - kilometers: "Use kilometers for the resulting distance." - miles: "Use miles for the resulting distance." - } - }, - ] - internal_failure_reasons: [] - return: types: ["object"] - - examples: [ - { - title: "Haversine in kilometers" - source: #""" - haversine(0.0, 0.0, 10.0, 10.0) - """# - return: { - distance: 1568.5227233 - bearing: 44.561 - } - }, - { - title: "Haversine in miles" - source: #""" - haversine(0.0, 0.0, 10.0, 10.0, "miles") - """# - return: { - distance: 974.6348468 - bearing: 44.561 - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/hmac.cue b/website/cue/reference/remap/functions/hmac.cue deleted file mode 100644 index 5d6972352f7c6..0000000000000 --- a/website/cue/reference/remap/functions/hmac.cue +++ /dev/null @@ -1,75 +0,0 @@ -package metadata - -remap: functions: hmac: { - category: "Cryptography" - description: """ - Calculates a [HMAC](\(urls.hmac)) of the `value` using the given `key`. - The hashing `algorithm` used can be optionally specified. - - For most use cases, the resulting bytestream should be encoded into a hex or base64 - string using either [encode_base16](\(urls.vrl_functions)/#encode_base16) or - [encode_base64](\(urls.vrl_functions)/#encode_base64). - - This function is infallible if either the default `algorithm` value or a recognized-valid compile-time - `algorithm` string literal is used. Otherwise, it is fallible. - """ - - arguments: [ - { - name: "value" - description: "The string to calculate the HMAC for." - required: true - type: ["string"] - }, - { - name: "key" - description: "The string to use as the cryptographic key." - required: true - type: ["string"] - }, - { - name: "algorithm" - description: "The hashing algorithm to use." - enum: { - "SHA1": "SHA1 algorithm" - "SHA-224": "SHA-224 algorithm" - "SHA-256": "SHA-256 algorithm" - "SHA-384": "SHA-384 algorithm" - "SHA-512": "SHA-512 algorithm" - } - required: false - default: "SHA-256" - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Calculate message HMAC (defaults: SHA-256), encoding to a base64 string" - source: #""" - encode_base64(hmac("Hello there", "super-secret-key")) - """# - return: "eLGE8YMviv85NPXgISRUZxstBNSU47JQdcXkUWcClmI=" - }, - { - title: "Calculate message HMAC using SHA-224, encoding to a hex-encoded string" - source: #""" - encode_base16(hmac("Hello there", "super-secret-key", algorithm: "SHA-224")) - """# - return: "42fccbc2b7d22a143b92f265a8046187558a94d11ddbb30622207e90" - }, - { - title: "Calculate message HMAC using a variable hash algorithm" - source: #""" - .hash_algo = "SHA-256" - hmac_bytes, err = hmac("Hello there", "super-secret-key", algorithm: .hash_algo) - if err == null { - .hmac = encode_base16(hmac_bytes) - } - """# - return: "78b184f1832f8aff3934f5e0212454671b2d04d494e3b25075c5e45167029662" - }, - ] -} diff --git a/website/cue/reference/remap/functions/includes.cue b/website/cue/reference/remap/functions/includes.cue deleted file mode 100644 index 2719c42e1401e..0000000000000 --- a/website/cue/reference/remap/functions/includes.cue +++ /dev/null @@ -1,35 +0,0 @@ -package metadata - -remap: functions: includes: { - category: "Enumerate" - description: """ - Determines whether the `value` array includes the specified `item`. - """ - - arguments: [ - { - name: "value" - description: "The array." - required: true - type: ["array"] - }, - { - name: "item" - description: "The item to check." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "Array includes" - source: #""" - includes(["apple", "orange", "banana"], "banana") - """# - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/int.cue b/website/cue/reference/remap/functions/int.cue deleted file mode 100644 index 6d754a63ffd4a..0000000000000 --- a/website/cue/reference/remap/functions/int.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: int: { - category: "Type" - description: """ - Returns `value` if it is an integer, otherwise returns an error. This enables the type checker to guarantee that the - returned value is an integer and can be used in any function that expects an integer. - """ - - arguments: [ - { - name: "value" - description: "The value to check if it is an integer." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [ - "`value` is not an integer.", - ] - return: { - types: ["integer"] - rules: [ - #"Returns the `value` if it's an integer."#, - #"Raises an error if not an integer."#, - ] - } - examples: [ - { - title: "Declare an integer type" - input: log: value: 42 - source: #""" - int!(.value) - """# - return: input.log.value - }, - ] -} diff --git a/website/cue/reference/remap/functions/ip_aton.cue b/website/cue/reference/remap/functions/ip_aton.cue deleted file mode 100644 index ec0e9faa32b32..0000000000000 --- a/website/cue/reference/remap/functions/ip_aton.cue +++ /dev/null @@ -1,34 +0,0 @@ -package metadata - -remap: functions: ip_aton: { - category: "IP" - description: """ - Converts IPv4 address in numbers-and-dots notation into network-order - bytes represented as an integer. - - This behavior mimics [inet_aton](\(urls.ip_aton)). - """ - - arguments: [ - { - name: "value" - description: "The IP address to convert to binary." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid IPv4 address.", - ] - return: types: ["integer"] - - examples: [ - { - title: "IPv4 to integer" - source: #""" - ip_aton!("1.2.3.4") - """# - return: 16909060 - }, - ] -} diff --git a/website/cue/reference/remap/functions/ip_cidr_contains.cue b/website/cue/reference/remap/functions/ip_cidr_contains.cue deleted file mode 100644 index ea435ec498c74..0000000000000 --- a/website/cue/reference/remap/functions/ip_cidr_contains.cue +++ /dev/null @@ -1,52 +0,0 @@ -package metadata - -remap: functions: ip_cidr_contains: { - category: "IP" - description: """ - Determines whether the `ip` is contained in the block referenced by the `cidr`. - """ - - arguments: [ - { - name: "cidr" - description: "The CIDR mask (v4 or v6)." - required: true - type: ["string", "array"] - }, - { - name: "ip" - description: "The IP address (v4 or v6)." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`cidr` is not a valid CIDR.", - "`ip` is not a valid IP address.", - ] - return: types: ["boolean"] - - examples: [ - { - title: "IPv4 contains CIDR" - source: #""" - ip_cidr_contains!("192.168.0.0/16", "192.168.10.32") - """# - return: true - }, - { - title: "IPv4 is private" - source: #""" - ip_cidr_contains!(["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"], "192.168.10.32") - """# - return: true - }, - { - title: "IPv6 contains CIDR" - source: #""" - ip_cidr_contains!("2001:4f8:4:ba::/64", "2001:4f8:4:ba:2e0:81ff:fe22:d1f1") - """# - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/ip_ntoa.cue b/website/cue/reference/remap/functions/ip_ntoa.cue deleted file mode 100644 index 26cb3aa300835..0000000000000 --- a/website/cue/reference/remap/functions/ip_ntoa.cue +++ /dev/null @@ -1,34 +0,0 @@ -package metadata - -remap: functions: ip_ntoa: { - category: "IP" - description: """ - Converts numeric representation of IPv4 address in network-order bytes - to numbers-and-dots notation. - - This behavior mimics [inet_ntoa](\(urls.ip_ntoa)). - """ - - arguments: [ - { - name: "value" - description: "The integer representation of an IPv4 address." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` cannot fit in an unsigned 32-bit integer.", - ] - return: types: ["string"] - - examples: [ - { - title: "Integer to IPv4" - source: #""" - ip_ntoa!(16909060) - """# - return: "1.2.3.4" - }, - ] -} diff --git a/website/cue/reference/remap/functions/ip_ntop.cue b/website/cue/reference/remap/functions/ip_ntop.cue deleted file mode 100644 index 4389b8fd386d7..0000000000000 --- a/website/cue/reference/remap/functions/ip_ntop.cue +++ /dev/null @@ -1,52 +0,0 @@ -package metadata - -remap: functions: ip_ntop: { - category: "IP" - description: """ - Converts IPv4 and IPv6 addresses from binary to text form. - - This behavior mimics [inet_ntop](\(urls.ip_ntop)). - """ - - notices: [ - """ - The binary data for this function is not easily printable. - However, the results from functions such as `decode_base64` or - `decode_percent` can still be used correctly. - """, - ] - - arguments: [ - { - name: "value" - description: """ - The binary data to convert from. - For IPv4 addresses, it must be 4 bytes (32 bits) long. - For IPv6 addresses, it must be 16 bytes (128 bits) long. - """ - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` must be of length 4 or 16 bytes.", - ] - return: types: ["string"] - - examples: [ - { - title: "Convert IPv4 address from bytes after decoding from Base64" - source: #""" - ip_ntop!(decode_base64!("wKgAAQ==")) - """# - return: "192.168.0.1" - }, - { - title: "Convert IPv6 address from bytes after decoding from Base64" - source: #""" - ip_ntop!(decode_base64!("IAENuIWjAAAAAIouA3BzNA==")) - """# - return: "2001:db8:85a3::8a2e:370:7334" - }, - ] -} diff --git a/website/cue/reference/remap/functions/ip_pton.cue b/website/cue/reference/remap/functions/ip_pton.cue deleted file mode 100644 index 2ec24fe4df180..0000000000000 --- a/website/cue/reference/remap/functions/ip_pton.cue +++ /dev/null @@ -1,51 +0,0 @@ -package metadata - -remap: functions: ip_pton: { - category: "IP" - description: """ - Converts IPv4 and IPv6 addresses from text to binary form. - - * The binary form of IPv4 addresses is 4 bytes (32 bits) long. - * The binary form of IPv6 addresses is 16 bytes (128 bits) long. - - This behavior mimics [inet_pton](\(urls.ip_pton)). - """ - - notices: [ - """ - The binary data from this function is not easily printable. - However, functions such as `encode_base64` or `encode_percent` can - still process it correctly. - """, - ] - - arguments: [ - { - name: "value" - description: "The IP address (v4 or v6) to convert to binary form." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid IP (v4 or v6) address in text form.", - ] - return: types: ["string"] - - examples: [ - { - title: "Convert IPv4 address to bytes and encode to Base64" - source: #""" - encode_base64(ip_pton!("192.168.0.1")) - """# - return: "wKgAAQ==" - }, - { - title: "Convert IPv6 address to bytes and encode to Base64" - source: #""" - encode_base64(ip_pton!("2001:db8:85a3::8a2e:370:7334")) - """# - return: "IAENuIWjAAAAAIouA3BzNA==" - }, - ] -} diff --git a/website/cue/reference/remap/functions/ip_subnet.cue b/website/cue/reference/remap/functions/ip_subnet.cue deleted file mode 100644 index 42b0a45df69be..0000000000000 --- a/website/cue/reference/remap/functions/ip_subnet.cue +++ /dev/null @@ -1,54 +0,0 @@ -package metadata - -remap: functions: ip_subnet: { - category: "IP" - description: """ - Extracts the subnet address from the `ip` using the supplied `subnet`. - """ - notices: [ - """ - Works with both IPv4 and IPv6 addresses. The IP version for the mask must be the same as the supplied - address. - """, - ] - - arguments: [ - { - name: "ip" - description: "The IP address (v4 or v6)." - required: true - type: ["string"] - }, - { - name: "subnet" - description: #""" - The subnet to extract from the IP address. This can be either a prefix length like `/8` or a net mask - like `255.255.0.0`. The net mask can be either an IPv4 or IPv6 address. - """# - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`ip` is not a valid IP address.", - "`subnet` is not a valid subnet.", - ] - return: types: ["string"] - - examples: [ - { - title: "IPv4 subnet" - source: #""" - ip_subnet!("192.168.10.32", "255.255.255.0") - """# - return: "192.168.10.0" - }, - { - title: "IPv6 subnet" - source: #""" - ip_subnet!("2404:6800:4003:c02::64", "/32") - """# - return: "2404:6800::" - }, - ] -} diff --git a/website/cue/reference/remap/functions/ip_to_ipv6.cue b/website/cue/reference/remap/functions/ip_to_ipv6.cue deleted file mode 100644 index d1eb75082d8f2..0000000000000 --- a/website/cue/reference/remap/functions/ip_to_ipv6.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: ip_to_ipv6: { - category: "IP" - description: """ - Converts the `ip` to an IPv6 address. - """ - - arguments: [ - { - name: "ip" - description: "The IP address to convert to IPv6." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`ip` is not a valid IP address.", - ] - return: { - types: ["string"] - rules: [ - "The `ip` is returned unchanged if it's already an IPv6 address.", - "The `ip` is converted to an IPv6 address if it's an IPv4 address.", - ] - } - - examples: [ - { - title: "IPv4 to IPv6" - source: #""" - ip_to_ipv6!("192.168.10.32") - """# - return: "::ffff:192.168.10.32" - }, - ] -} diff --git a/website/cue/reference/remap/functions/ipv6_to_ipv4.cue b/website/cue/reference/remap/functions/ipv6_to_ipv4.cue deleted file mode 100644 index fab14bde1117a..0000000000000 --- a/website/cue/reference/remap/functions/ipv6_to_ipv4.cue +++ /dev/null @@ -1,41 +0,0 @@ -package metadata - -remap: functions: ipv6_to_ipv4: { - category: "IP" - description: """ - Converts the `ip` to an IPv4 address. `ip` is returned unchanged if it's already an IPv4 address. If `ip` is - currently an IPv6 address then it needs to be IPv4 compatible, otherwise an error is thrown. - """ - - arguments: [ - { - name: "ip" - description: "The IPv4-mapped IPv6 address to convert." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`ip` is not a valid IP address.", - "`ip` is an IPv6 address that is not compatible with IPv4.", - ] - return: { - types: ["string"] - rules: [ - """ - The `ip` is returned unchanged if it's already an IPv4 address. If it's an IPv6 address it must be IPv4 - compatible, otherwise an error is thrown. - """, - ] - } - - examples: [ - { - title: "IPv6 to IPv4" - source: #""" - ipv6_to_ipv4!("::ffff:192.168.0.1") - """# - return: "192.168.0.1" - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_array.cue b/website/cue/reference/remap/functions/is_array.cue deleted file mode 100644 index a72eb1589c94f..0000000000000 --- a/website/cue/reference/remap/functions/is_array.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: is_array: { - category: "Type" - description: """ - Check if the `value`'s type is an array. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is an array."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is an array."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid array" - source: """ - is_array([1, 2, 3]) - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_array("a string") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_boolean.cue b/website/cue/reference/remap/functions/is_boolean.cue deleted file mode 100644 index e4e7ce3a3b610..0000000000000 --- a/website/cue/reference/remap/functions/is_boolean.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: is_boolean: { - category: "Type" - description: """ - Check if the `value`'s type is a boolean. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is a Boolean."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is a boolean."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid boolean" - source: """ - is_boolean(false) - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_boolean("a string") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_empty.cue b/website/cue/reference/remap/functions/is_empty.cue deleted file mode 100644 index 0aa22c1d2dd73..0000000000000 --- a/website/cue/reference/remap/functions/is_empty.cue +++ /dev/null @@ -1,49 +0,0 @@ -package metadata - -remap: functions: is_empty: { - category: "Type" - description: """ - Check if the object, array, or string has a length of `0`. - """ - - arguments: [ - { - name: "value" - description: #"The value to check."# - required: true - type: ["object", "array", "string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is empty."#, - #"Returns `false` if `value` is non-empty."#, - ] - } - - examples: [ - { - title: "Empty array" - source: """ - is_empty([]) - """ - return: true - }, - { - title: "Non-empty string" - source: """ - is_empty("a string") - """ - return: false - }, - { - title: "Non-empty object" - source: """ - is_empty({"foo": "bar"}) - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_float.cue b/website/cue/reference/remap/functions/is_float.cue deleted file mode 100644 index a50f1c964907c..0000000000000 --- a/website/cue/reference/remap/functions/is_float.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: is_float: { - category: "Type" - description: """ - Check if the `value`'s type is a float. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is a float."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is a float."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid float" - source: """ - is_float(0.577) - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_float("a string") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_integer.cue b/website/cue/reference/remap/functions/is_integer.cue deleted file mode 100644 index 6148399aee575..0000000000000 --- a/website/cue/reference/remap/functions/is_integer.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: is_integer: { - category: "Type" - description: """ - Check if the value`'s type is an integer. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is an integer."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is an integer."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid integer" - source: """ - is_integer(1) - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_integer("a string") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_ipv4.cue b/website/cue/reference/remap/functions/is_ipv4.cue deleted file mode 100644 index fc3a20a94fcdd..0000000000000 --- a/website/cue/reference/remap/functions/is_ipv4.cue +++ /dev/null @@ -1,53 +0,0 @@ -package metadata - -remap: functions: is_ipv4: { - category: "IP" - description: """ - Check if the string is a valid IPv4 address or not. - - An [IPv4-mapped][https://datatracker.ietf.org/doc/html/rfc6890] or - [IPv4-compatible][https://datatracker.ietf.org/doc/html/rfc6890] IPv6 address is not considered - valid for the purpose of this function. - """ - - arguments: [ - { - name: "value" - description: "The IP address to check" - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is a valid IPv4 address."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid IPv4 address" - source: """ - is_ipv4("10.0.102.37") - """ - return: true - }, - { - title: "Valid IPv6 address" - source: """ - is_ipv4("2001:0db8:85a3:0000:0000:8a2e:0370:7334") - """ - return: false - }, - { - title: "Arbitrary string" - source: """ - is_ipv4("foobar") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_ipv6.cue b/website/cue/reference/remap/functions/is_ipv6.cue deleted file mode 100644 index 14761f3e3ace8..0000000000000 --- a/website/cue/reference/remap/functions/is_ipv6.cue +++ /dev/null @@ -1,49 +0,0 @@ -package metadata - -remap: functions: is_ipv6: { - category: "IP" - description: """ - Check if the string is a valid IPv6 address or not. - """ - - arguments: [ - { - name: "value" - description: "The IP address to check" - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is a valid IPv6 address."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid IPv6 address" - source: """ - is_ipv6("2001:0db8:85a3:0000:0000:8a2e:0370:7334") - """ - return: true - }, - { - title: "Valid IPv4 address" - source: """ - is_ipv6("10.0.102.37") - """ - return: false - }, - { - title: "Arbitrary string" - source: """ - is_ipv6("foobar") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_json.cue b/website/cue/reference/remap/functions/is_json.cue deleted file mode 100644 index 2a24a2e606e36..0000000000000 --- a/website/cue/reference/remap/functions/is_json.cue +++ /dev/null @@ -1,70 +0,0 @@ -package metadata - -remap: functions: is_json: { - category: "Type" - description: """ - Check if the string is a valid JSON document. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is a valid JSON document."# - required: true - type: ["string"] - }, - { - name: "variant" - description: "The variant of the JSON type to explicitly check for." - enum: { - "object": "JSON object - {}" - "array": "JSON array - []" - "string": "JSON-formatted string values wrapped with quote marks" - "number": "Integer or float numbers" - "bool": "True or false" - "null": "Exact null value" - } - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is a valid JSON document."#, - #"Returns `false` if `value` is not JSON-formatted."#, - ] - } - - examples: [ - { - title: "Valid JSON object" - source: """ - is_json("{}") - """ - return: true - }, - { - title: "Non-valid value" - source: """ - is_json("{") - """ - return: false - }, - { - title: "Exact variant" - source: """ - is_json("{}", variant: "object") - """ - return: true - }, - { - title: "Non-valid exact variant" - source: """ - is_json("{}", variant: "array") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_null.cue b/website/cue/reference/remap/functions/is_null.cue deleted file mode 100644 index bc85c5c378480..0000000000000 --- a/website/cue/reference/remap/functions/is_null.cue +++ /dev/null @@ -1,43 +0,0 @@ -package metadata - -remap: functions: is_null: { - category: "Type" - description: """ - Check if `value`'s type is `null`. For a more relaxed function, - see [`is_nullish`](\(urls.vrl_functions)#\(remap.functions.is_nullish.anchor)). - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is `null`."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is null."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Null value" - source: """ - is_null(null) - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_null("a string") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_nullish.cue b/website/cue/reference/remap/functions/is_nullish.cue deleted file mode 100644 index 7f3adaa0f4dcd..0000000000000 --- a/website/cue/reference/remap/functions/is_nullish.cue +++ /dev/null @@ -1,59 +0,0 @@ -package metadata - -remap: functions: is_nullish: { - category: "Type" - description: """ - Determines whether `value` is nullish. Returns `true` if the specified `value` is `null`, - an empty string, a string containing only whitespace, or the string `"-"`. Returns `false` otherwise. - """ - - notices: [ - """ - This function behaves inconsistently: it returns `false` for empty arrays (`[]`) and objects (`{}`), - but `true` for empty strings (`""`) and `null`. - """, - ] - - arguments: [ - { - name: "value" - description: #"The value to check for nullishness, for example, a useless value."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is `null`."#, - #"Returns `true` if `value` is `"-"`."#, - #"Returns `true` if `value` is whitespace as defined by [Unicode `White_Space` property](\#(urls.unicode_whitespace))."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Null detection (blank string)" - source: """ - is_nullish("") - """ - return: true - }, - { - title: "Null detection (dash string)" - source: """ - is_nullish("-") - """ - return: true - }, - { - title: "Null detection (whitespace)" - source: """ - is_nullish("\n \n") - """ - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_object.cue b/website/cue/reference/remap/functions/is_object.cue deleted file mode 100644 index 86258aa1ca2c4..0000000000000 --- a/website/cue/reference/remap/functions/is_object.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: is_object: { - category: "Type" - description: """ - Check if `value`'s type is an object. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is an object."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is an object."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid object" - source: """ - is_object({"foo": "bar"}) - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_object("a string") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_regex.cue b/website/cue/reference/remap/functions/is_regex.cue deleted file mode 100644 index 4693aebaa0b5e..0000000000000 --- a/website/cue/reference/remap/functions/is_regex.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: is_regex: { - category: "Type" - description: """ - Check if `value`'s type is a regex. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is a regex."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is a regex."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid regex" - source: """ - is_regex(r'pattern') - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_regex("a string") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_string.cue b/website/cue/reference/remap/functions/is_string.cue deleted file mode 100644 index fd1a3b9c7c8ba..0000000000000 --- a/website/cue/reference/remap/functions/is_string.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: is_string: { - category: "Type" - description: """ - Check if `value`'s type is a string. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is a string."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is a string."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid string" - source: """ - is_string("a string") - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_string([1, 2, 3]) - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/is_timestamp.cue b/website/cue/reference/remap/functions/is_timestamp.cue deleted file mode 100644 index 54f24b4d9b474..0000000000000 --- a/website/cue/reference/remap/functions/is_timestamp.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: is_timestamp: { - category: "Type" - description: """ - Check if `value`'s type is a timestamp. - """ - - arguments: [ - { - name: "value" - description: #"The value to check if it is a timestamp."# - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` is a timestamp."#, - #"Returns `false` if `value` is anything else."#, - ] - } - - examples: [ - { - title: "Valid timestamp" - source: """ - is_timestamp(t'2021-03-26T16:00:00Z') - """ - return: true - }, - { - title: "Non-matching type" - source: """ - is_timestamp("a string") - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/join.cue b/website/cue/reference/remap/functions/join.cue deleted file mode 100644 index fb699d1bcca5e..0000000000000 --- a/website/cue/reference/remap/functions/join.cue +++ /dev/null @@ -1,45 +0,0 @@ -package metadata - -remap: functions: join: { - category: "String" - description: #""" - Joins each string in the `value` array into a single string, with items optionally separated from one another - by a `separator`. - """# - - arguments: [ - { - name: "value" - description: "The array of strings to join together." - required: true - type: ["array"] - }, - { - name: "separator" - description: "The string separating each original element when joined." - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["string"] - } - - examples: [ - { - title: "Join array (no separator)" - source: #""" - join!(["bring", "us", "together"]) - """# - return: "bringustogether" - }, - { - title: "Join array (comma separator)" - source: #""" - join!(["sources", "transforms", "sinks"], separator: ", ") - """# - return: "sources, transforms, sinks" - }, - ] -} diff --git a/website/cue/reference/remap/functions/kebabcase.cue b/website/cue/reference/remap/functions/kebabcase.cue deleted file mode 100644 index 8e113b7d2189c..0000000000000 --- a/website/cue/reference/remap/functions/kebabcase.cue +++ /dev/null @@ -1,43 +0,0 @@ -package metadata - -remap: functions: kebabcase: { - category: "String" - description: """ - Takes the `value` string, and turns it into kebab-case. Optionally, you can - pass in the existing case of the function, or else we will try to figure out the case automatically. - """ - - arguments: [ - { - name: "value" - description: "The string to convert to kebab-case." - required: true - type: ["string"] - }, - { - name: "original_case" - description: "Optional hint on the original case type. Must be one of: kebab-case, camelCase, PascalCase, SCREAMING_SNAKE, snake_case" - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "kebab-case a string" - source: #""" - kebabcase("InputString") - """# - return: "input-string" - }, - { - title: "kebab-case a string" - source: #""" - kebabcase("InputString", "PascalCase") - """# - return: "input-string" - }, - ] -} diff --git a/website/cue/reference/remap/functions/keys.cue b/website/cue/reference/remap/functions/keys.cue deleted file mode 100644 index be3c493fa612f..0000000000000 --- a/website/cue/reference/remap/functions/keys.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: keys: { - category: "Enumerate" - description: #""" - Returns the keys from the object passed into the function. - """# - - arguments: [ - { - name: "value" - description: "The object to extract keys from." - required: true - type: ["object"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array"] - rules: [ - #"Returns an array of all the keys"#, - ] - } - examples: [ - { - title: "Get keys from the object" - input: log: { - "key1": "val1" - "key2": "val2" - } - source: #""" - keys({"key1": "val1", "key2": "val2"}) - """# - return: ["key1", "key2"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/length.cue b/website/cue/reference/remap/functions/length.cue deleted file mode 100644 index 9f685e66c5697..0000000000000 --- a/website/cue/reference/remap/functions/length.cue +++ /dev/null @@ -1,75 +0,0 @@ -package metadata - -remap: functions: length: { - category: "Enumerate" - // the `return` rules below aren't rendered so we copy them here - description: """ - Returns the length of the `value`. - - * If `value` is an array, returns the number of elements. - * If `value` is an object, returns the number of top-level keys. - * If `value` is a string, returns the number of bytes in the string. If - you want the number of characters, see `strlen`. - """ - - arguments: [ - { - name: "value" - description: "The array or object." - required: true - type: ["array", "object", "string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["integer"] - rules: [ - "If `value` is an array, returns the number of elements.", - "If `value` is an object, returns the number of top-level keys.", - "If `value` is a string, returns the number of bytes in the string.", - ] - } - - examples: [ - { - title: "Length (object)" - source: """ - length({ - "portland": "Trail Blazers", - "seattle": "Supersonics" - }) - """ - return: 2 - }, - { - title: "Length (nested object)" - source: """ - length({ - "home": { - "city": "Portland", - "state": "Oregon" - }, - "name": "Trail Blazers", - "mascot": { - "name": "Blaze the Trail Cat" - } - }) - """ - return: 3 - }, - { - title: "Length (array)" - source: """ - length(["Trail Blazers", "Supersonics", "Grizzlies"]) - """ - return: 3 - }, - { - title: "Length (string)" - source: """ - length("The Planet of the Apes Musical") - """ - return: 30 - }, - ] -} diff --git a/website/cue/reference/remap/functions/log.cue b/website/cue/reference/remap/functions/log.cue deleted file mode 100644 index 26b885198bb6b..0000000000000 --- a/website/cue/reference/remap/functions/log.cue +++ /dev/null @@ -1,66 +0,0 @@ -package metadata - -remap: functions: log: { - category: "Debug" - description: """ - Logs the `value` to [stdout](\(urls.stdout)) at the specified `level`. - """ - - pure: false - - arguments: [ - { - name: "value" - description: "The value to log." - required: true - type: ["any"] - }, - { - name: "level" - description: "The log level." - required: false - type: ["string"] - enum: { - trace: "Log at the `trace` level." - debug: "Log at the `debug` level." - info: "Log at the `info` level." - warn: "Log at the `warn` level." - error: "Log at the `error` level." - } - default: "info" - }, - { - name: "rate_limit_secs" - description: #""" - Specifies that the log message is output no more than once per the given number of seconds. - Use a value of `0` to turn rate limiting off. - """# - type: ["integer"] - required: false - default: 1 - }, - ] - internal_failure_reasons: [] - return: types: ["null"] - - examples: [ - { - title: "Log a message" - source: #""" - log("Hello, World!", level: "info", rate_limit_secs: 60) - """# - return: null - }, - { - title: "Log an error" - input: log: field: "not an integer" - source: #""" - _, err = to_int(.field) - if err != null { - log(err, level: "error") - } - """# - return: null - }, - ] -} diff --git a/website/cue/reference/remap/functions/map_keys.cue b/website/cue/reference/remap/functions/map_keys.cue deleted file mode 100644 index b4b835f2dce18..0000000000000 --- a/website/cue/reference/remap/functions/map_keys.cue +++ /dev/null @@ -1,87 +0,0 @@ -package metadata - -remap: functions: map_keys: { - category: "Enumerate" - description: #""" - Map the keys within an object. - - If `recursive` is enabled, the function iterates into nested - objects, using the following rules: - - 1. Iteration starts at the root. - 2. For every nested object type: - - First return the key of the object type itself. - - Then recurse into the object, and loop back to item (1) - in this list. - - Any mutation done on a nested object *before* recursing into - it, are preserved. - 3. For every nested array type: - - First return the key of the array type itself. - - Then find all objects within the array, and apply item (2) - to each individual object. - - The above rules mean that `map_keys` with - `recursive` enabled finds *all* keys in the target, - regardless of whether nested objects are nested inside arrays. - - The function uses the function closure syntax to allow reading - the key for each item in the object. - - The same scoping rules apply to closure blocks as they do for - regular blocks. This means that any variable defined in parent scopes - is accessible, and mutations to those variables are preserved, - but any new variables instantiated in the closure block are - unavailable outside of the block. - - See the examples below to learn about the closure syntax. - """# - - arguments: [ - { - name: "value" - description: "The object to iterate." - required: true - type: ["object"] - }, - { - name: "recursive" - description: "Whether to recursively iterate the collection." - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["object"] - } - examples: [ - { - title: "Upcase keys" - input: log: { - foo: "foo" - bar: "bar" - } - source: #""" - map_keys(.) -> |key| { upcase(key) } - """# - return: {"FOO": "foo", "BAR": "bar"} - }, - { - title: "De-dot keys" - input: log: { - labels: { - "app.kubernetes.io/name": "mysql" - } - } - source: #""" - map_keys(., recursive: true) -> |key| { replace(key, ".", "_") } - """# - return: { - labels: { - "app_kubernetes_io/name": "mysql" - } - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/map_values.cue b/website/cue/reference/remap/functions/map_values.cue deleted file mode 100644 index fdfc8548dca40..0000000000000 --- a/website/cue/reference/remap/functions/map_values.cue +++ /dev/null @@ -1,63 +0,0 @@ -package metadata - -remap: functions: map_values: { - category: "Enumerate" - description: #""" - Map the values within a collection. - - If `recursive` is enabled, the function iterates into nested - collections, using the following rules: - - 1. Iteration starts at the root. - 2. For every nested collection type: - - First return the collection type itself. - - Then recurse into the collection, and loop back to item (1) - in the list - - Any mutation done on a collection *before* recursing into it, - are preserved. - - The function uses the function closure syntax to allow mutating - the value for each item in the collection. - - The same scoping rules apply to closure blocks as they do for - regular blocks, meaning, any variable defined in parent scopes - are accessible, and mutations to those variables are preserved, - but any new variables instantiated in the closure block are - unavailable outside of the block. - - Check out the examples below to learn about the closure syntax. - """# - - arguments: [ - { - name: "value" - description: "The object or array to iterate." - required: true - type: ["array", "object"] - }, - { - name: "recursive" - description: "Whether to recursively iterate the collection." - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array", "object"] - } - examples: [ - { - title: "Upcase values" - input: log: { - foo: "foo" - bar: "bar" - } - source: #""" - map_values(.) -> |value| { upcase!(value) } - """# - return: {"foo": "FOO", "bar": "BAR"} - }, - ] -} diff --git a/website/cue/reference/remap/functions/match.cue b/website/cue/reference/remap/functions/match.cue deleted file mode 100644 index 4ee82bdb5a574..0000000000000 --- a/website/cue/reference/remap/functions/match.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: match: { - category: "String" - description: """ - Determines whether the `value` matches the `pattern`. - """ - - arguments: [ - { - name: "value" - description: "The value to match." - required: true - type: ["string"] - }, - { - name: "pattern" - description: "The regular expression pattern to match against." - required: true - type: ["regex"] - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "Regex match on a string" - source: """ - match("I'm a little teapot", r'teapot') - """ - return: true - }, - { - title: "String does not match the regular expression" - source: """ - match("I'm a little teapot", r'.*balloon') - """ - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/match_any.cue b/website/cue/reference/remap/functions/match_any.cue deleted file mode 100644 index 1dc0767d9623b..0000000000000 --- a/website/cue/reference/remap/functions/match_any.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: match_any: { - category: "String" - description: """ - Determines whether `value` matches any of the given `patterns`. All - patterns are checked in a single pass over the target string, giving this - function a potential performance advantage over the multiple calls - in the `match` function. - """ - - arguments: [ - { - name: "value" - description: "The value to match." - required: true - type: ["string"] - }, - { - name: "patterns" - description: "The array of regular expression patterns to match against." - required: true - type: ["array"] - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "Regex match on a string" - source: """ - match_any("I'm a little teapot", [r'frying pan', r'teapot']) - """ - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/match_array.cue b/website/cue/reference/remap/functions/match_array.cue deleted file mode 100644 index c45d46c2654f2..0000000000000 --- a/website/cue/reference/remap/functions/match_array.cue +++ /dev/null @@ -1,63 +0,0 @@ -package metadata - -remap: functions: match_array: { - category: "Enumerate" - description: """ - Determines whether the elements in the `value` array matches the `pattern`. By default, it checks that at least one element matches, but can be set to determine if all the elements match. - """ - - arguments: [ - { - name: "value" - description: "The array." - required: true - type: ["array"] - }, - { - name: "pattern" - description: "The regular expression pattern to match against." - required: true - type: ["regex"] - }, - { - name: "all" - description: "Whether to match on all elements of `value`." - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "Match at least one element" - source: #""" - match_array(["foobar", "bazqux"], r'foo') - """# - return: true - }, - { - title: "Match all elements" - source: #""" - match_array(["foo", "foobar", "barfoo"], r'foo', all: true) - """# - return: true - }, - { - title: "No matches" - source: #""" - match_array(["bazqux", "xyz"], r'foo') - """# - return: false - }, - { - title: "Not all elements match" - source: #""" - match_array(["foo", "foobar", "baz"], r'foo', all: true) - """# - return: false - }, - ] -} diff --git a/website/cue/reference/remap/functions/match_datadog_query.cue b/website/cue/reference/remap/functions/match_datadog_query.cue deleted file mode 100644 index bef981f5792e6..0000000000000 --- a/website/cue/reference/remap/functions/match_datadog_query.cue +++ /dev/null @@ -1,56 +0,0 @@ -package metadata - -remap: functions: match_datadog_query: { - category: "Object" - description: """ - Matches an object against a [Datadog Search Syntax](\(urls.datadog_search_syntax)) query. - """ - - arguments: [ - { - name: "value" - description: "The object." - required: true - type: ["object"] - }, - { - name: "query" - description: "The Datadog Search Syntax query." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "OR query" - source: #""" - match_datadog_query({"message": "contains this and that"}, "this OR that") - """# - return: true - }, - { - title: "AND query" - source: #""" - match_datadog_query({"message": "contains only this"}, "this AND that") - """# - return: false - }, - { - title: "Attribute wildcard" - source: #""" - match_datadog_query({"name": "foobar"}, "@name:foo*") - """# - return: true - }, - { - title: "Tag range" - source: #""" - match_datadog_query({"tags": ["a:x", "b:y", "c:z"]}, s'b:["x" TO "z"]') - """# - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/md5.cue b/website/cue/reference/remap/functions/md5.cue deleted file mode 100644 index 623b525bc3bfe..0000000000000 --- a/website/cue/reference/remap/functions/md5.cue +++ /dev/null @@ -1,29 +0,0 @@ -package metadata - -remap: functions: md5: { - category: "Cryptography" - description: """ - Calculates an md5 hash of the `value`. - """ - - arguments: [ - { - name: "value" - description: "The string to calculate the hash for." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Create md5 hash" - source: #""" - md5("foo") - """# - return: "acbd18db4cc2f85cedef654fccc4a4d8" - }, - ] -} diff --git a/website/cue/reference/remap/functions/merge.cue b/website/cue/reference/remap/functions/merge.cue deleted file mode 100644 index 4aa33b6af856c..0000000000000 --- a/website/cue/reference/remap/functions/merge.cue +++ /dev/null @@ -1,104 +0,0 @@ -package metadata - -remap: functions: merge: { - category: "Object" - description: """ - Merges the `from` object into the `to` object. - """ - - arguments: [ - { - name: "to" - description: "The object to merge into." - required: true - type: ["object"] - }, - { - name: "from" - description: "The object to merge from." - required: true - type: ["object"] - }, - { - name: "deep" - description: "A deep merge is performed if `true`, otherwise only top-level fields are merged." - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["object"] - rules: [ - #"The field from the `from` object is chosen if a key exists in both objects."#, - #""" - Objects are merged recursively if `deep` is specified, a key exists in both objects, and both of those - fields are also objects. - """#, - ] - } - - examples: [ - { - title: "Object merge (shallow)" - source: #""" - merge( - { - "parent1": { - "child1": 1, - "child2": 2 - }, - "parent2": { - "child3": 3 - } - }, - { - "parent1": { - "child2": 4, - "child5": 5 - } - } - ) - """# - return: { - parent1: { - child2: 4 - child5: 5 - } - parent2: child3: 3 - } - }, - { - title: "Object merge (deep)" - source: #""" - merge( - { - "parent1": { - "child1": 1, - "child2": 2 - }, - "parent2": { - "child3": 3 - } - }, - { - "parent1": { - "child2": 4, - "child5": 5 - } - }, - deep: true - ) - """# - return: { - parent1: { - child1: 1 - child2: 4 - child5: 5 - } - parent2: child3: 3 - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/mod.cue b/website/cue/reference/remap/functions/mod.cue deleted file mode 100644 index 8241f50a9b52e..0000000000000 --- a/website/cue/reference/remap/functions/mod.cue +++ /dev/null @@ -1,39 +0,0 @@ -package metadata - -remap: functions: mod: { - category: "Number" - description: """ - Calculates the remainder of `value` divided by `modulus`. - """ - - arguments: [ - { - name: "value" - description: "The value the `modulus` is applied to." - required: true - type: ["integer", "float"] - }, - { - name: "modulus" - description: "The `modulus` value." - required: true - type: ["integer", "float"] - }, - ] - internal_failure_reasons: [ - "`value` is not an integer or float.", - "`modulus` is not an integer or float.", - "`modulus` is equal to 0.", - ] - return: types: ["integer", "float"] - - examples: [ - { - title: "Calculate the remainder of two integers" - source: #""" - mod(5, 2) - """# - return: 1 - }, - ] -} diff --git a/website/cue/reference/remap/functions/now.cue b/website/cue/reference/remap/functions/now.cue deleted file mode 100644 index cffe9accb726f..0000000000000 --- a/website/cue/reference/remap/functions/now.cue +++ /dev/null @@ -1,22 +0,0 @@ -package metadata - -remap: functions: now: { - category: "Timestamp" - description: """ - Returns the current timestamp in the UTC timezone with nanosecond precision. - """ - - arguments: [] - internal_failure_reasons: [] - return: types: ["timestamp"] - - examples: [ - { - title: "Generate a current timestamp" - source: #""" - now() - """# - return: "2021-03-04T10:51:15.928937Z" - }, - ] -} diff --git a/website/cue/reference/remap/functions/object.cue b/website/cue/reference/remap/functions/object.cue deleted file mode 100644 index adfc2446538cf..0000000000000 --- a/website/cue/reference/remap/functions/object.cue +++ /dev/null @@ -1,41 +0,0 @@ -package metadata - -remap: functions: object: { - category: "Type" - description: """ - Returns `value` if it is an object, otherwise returns an error. This enables the type checker to guarantee that the - returned value is an object and can be used in any function that expects an object. - """ - - arguments: [ - { - name: "value" - description: "The value to check if it is an object." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [ - "`value` is not an object.", - ] - return: { - types: ["object"] - rules: [ - #"Returns the `value` if it's an object."#, - #"Raises an error if not an object."#, - ] - } - examples: [ - { - title: "Declare an object type" - input: log: value: { - field1: "value1" - field2: "value2" - } - source: #""" - object!(.value) - """# - return: input.log.value - }, - ] -} diff --git a/website/cue/reference/remap/functions/object_from_array.cue b/website/cue/reference/remap/functions/object_from_array.cue deleted file mode 100644 index 1bb29246375ab..0000000000000 --- a/website/cue/reference/remap/functions/object_from_array.cue +++ /dev/null @@ -1,54 +0,0 @@ -package metadata - -remap: functions: object_from_array: { - category: "Object" - description: """ - Iterate over either one array of arrays or a pair of arrays and create an object out of all the key-value pairs contained in them. - With one array of arrays, any entries with no value use `null` instead. - Any keys that are `null` skip the corresponding value. - - If a single parameter is given, it must contain an array of all the input arrays. - """ - - arguments: [ - { - name: "values" - description: "The first array of elements, or the array of input arrays if no other parameter is present." - required: true - type: ["array"] - }, - { - name: "keys" - description: "The second array of elements. If not present, the first parameter must contain all the arrays." - required: false - type: ["array"] - }, - ] - internal_failure_reasons: [ - "`values` and `keys` must be arrays.", - "If `keys` is not present, `values` must contain only arrays.", - ] - return: { - types: ["object"] - rules: [ - "`object_from_array` is considered fallible in the following cases: if any of the parameters is not an array; if only the `value` parameter is present and it is not an array of arrays; or if any of the keys are not either a string or `null`.", - ] - } - - examples: [ - { - title: "Create an object from one array" - source: #""" - object_from_array([["one", 1], [null, 2], ["two", 3]]) - """# - return: {"one": 1, "two": 3} - }, - { - title: "Create an object from separate key and value arrays" - source: #""" - object_from_array([1, 2, 3], keys: ["one", null, "two"]) - """# - return: {"one": 1, "two": 3} - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_apache_log.cue b/website/cue/reference/remap/functions/parse_apache_log.cue deleted file mode 100644 index 8da0fbdf0c0d5..0000000000000 --- a/website/cue/reference/remap/functions/parse_apache_log.cue +++ /dev/null @@ -1,114 +0,0 @@ -package metadata - -remap: functions: parse_apache_log: { - category: "Parse" - description: """ - Parses Apache access and error log lines. Lines can be in [`common`](\(urls.apache_common)), - [`combined`](\(urls.apache_combined)), or the default [`error`](\(urls.apache_error)) format. - """ - notices: [ - """ - Missing information in the log message may be indicated by `-`. These fields are omitted in the result. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "timestamp_format" - description: """ - The [date/time format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) to use for - encoding the timestamp. The time is parsed in local time if the timestamp does not specify a timezone. - """ - required: false - default: "%d/%b/%Y:%T %z" - type: ["string"] - }, - { - name: "format" - description: "The format to use for parsing the log." - required: true - enum: { - "common": "Common format" - "combined": "Apache combined format" - "error": "Default Apache error format" - } - type: ["string"] - }, - ] - - internal_failure_reasons: [ - "`value` does not match the specified format.", - "`timestamp_format` is not a valid format string.", - "The timestamp in `value` fails to parse using the provided `timestamp_format`.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse using Apache log format (common)" - source: #""" - parse_apache_log!("127.0.0.1 bob frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326", format: "common") - """# - return: { - host: "127.0.0.1" - identity: "bob" - user: "frank" - timestamp: "2000-10-10T20:55:36Z" - message: "GET /apache_pb.gif HTTP/1.0" - method: "GET" - path: "/apache_pb.gif" - protocol: "HTTP/1.0" - status: 200 - size: 2326 - } - }, - { - title: "Parse using Apache log format (combined)" - source: #""" - parse_apache_log!( - s'127.0.0.1 bob frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.seniorinfomediaries.com/vertical/channels/front-end/bandwidth" "Mozilla/5.0 (X11; Linux i686; rv:5.0) Gecko/1945-10-12 Firefox/37.0"', - "combined", - ) - """# - return: { - host: "127.0.0.1" - identity: "bob" - user: "frank" - timestamp: "2000-10-10T20:55:36Z" - message: "GET /apache_pb.gif HTTP/1.0" - method: "GET" - path: "/apache_pb.gif" - protocol: "HTTP/1.0" - status: 200 - size: 2326 - referrer: "http://www.seniorinfomediaries.com/vertical/channels/front-end/bandwidth" - agent: "Mozilla/5.0 (X11; Linux i686; rv:5.0) Gecko/1945-10-12 Firefox/37.0" - } - }, - { - title: "Parse using Apache log format (error)" - source: #""" - parse_apache_log!( - s'[01/Mar/2021:12:00:19 +0000] [ab:alert] [pid 4803:tid 3814] [client 147.159.108.175:24259] I will bypass the haptic COM bandwidth, that should matrix the CSS driver!', - "error" - ) - """# - return: { - client: "147.159.108.175" - message: "I will bypass the haptic COM bandwidth, that should matrix the CSS driver!" - module: "ab" - pid: 4803 - port: 24259 - severity: "alert" - thread: "3814" - timestamp: "2021-03-01T12:00:19Z" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_aws_alb_log.cue b/website/cue/reference/remap/functions/parse_aws_alb_log.cue deleted file mode 100644 index 21f87d19aa227..0000000000000 --- a/website/cue/reference/remap/functions/parse_aws_alb_log.cue +++ /dev/null @@ -1,116 +0,0 @@ -package metadata - -remap: functions: parse_aws_alb_log: { - category: "Parse" - description: """ - Parses `value` in the [Elastic Load Balancer Access format](\(urls.aws_elb_access_format)). - """ - - arguments: [ - { - name: "value" - description: "Access log of the Application Load Balancer." - required: true - type: ["string"] - }, - { - name: "strict_mode" - description: "When set to `false`, the parser ignores any newly added or trailing fields in AWS ALB logs instead of failing. Defaults to `true` to preserve strict parsing behavior." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted AWS ALB log.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse AWS ALB log" - source: #""" - parse_aws_alb_log!( - "http 2018-11-30T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 192.168.131.39:2817 - 0.000 0.001 0.000 200 200 34 366 \"GET http://www.example.com:80/ HTTP/1.1\" \"curl/7.46.0\" - - arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 \"Root=1-58337364-23a8c76965a2ef7629b185e3\" \"-\" \"-\" 0 2018-11-30T22:22:48.364000Z \"forward\" \"-\" \"-\" \"-\" \"-\" \"-\" \"-\"" - ) - """# - return: { - type: "http" - timestamp: "2018-11-30T22:23:00.186641Z" - elb: "app/my-loadbalancer/50dc6c495c0c9188" - client_host: "192.168.131.39:2817" - target_host: null - request_processing_time: 0.0 - target_processing_time: 0.001 - response_processing_time: 0.0 - elb_status_code: "200" - target_status_code: "200" - received_bytes: 34 - sent_bytes: 366 - request_method: "GET" - request_url: "http://www.example.com:80/" - request_protocol: "HTTP/1.1" - user_agent: "curl/7.46.0" - ssl_cipher: null - ssl_protocol: null - target_group_arn: "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067" - trace_id: "Root=1-58337364-23a8c76965a2ef7629b185e3" - traceability_id: null - domain_name: null - chosen_cert_arn: null - matched_rule_priority: "0" - request_creation_time: "2018-11-30T22:22:48.364000Z" - actions_executed: "forward" - redirect_url: null - error_reason: null - target_port_list: [] - target_status_code_list: [] - classification: null - classification_reason: null - } - }, - { - title: "Parse AWS ALB log with trailing fields (non-strict mode)" - source: #""" - parse_aws_alb_log!( - "http 2018-11-30T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 192.168.131.39:2817 - 0.000 0.001 0.000 200 200 34 366 \"GET http://www.example.com:80/ HTTP/1.1\" \"curl/7.46.0\" - - arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 \"Root=1-58337364-23a8c76965a2ef7629b185e3\" \"-\" \"-\" 0 2018-11-30T22:22:48.364000Z \"forward\" \"-\" \"-\" \"-\" \"-\" \"-\" \"-\" TID_12345 \"-\" \"-\" \"-\"", - strict_mode: false - ) - """# - return: { - type: "http" - timestamp: "2018-11-30T22:23:00.186641Z" - elb: "app/my-loadbalancer/50dc6c495c0c9188" - client_host: "192.168.131.39:2817" - target_host: null - request_processing_time: 0.0 - target_processing_time: 0.001 - response_processing_time: 0.0 - elb_status_code: "200" - target_status_code: "200" - received_bytes: 34 - sent_bytes: 366 - request_method: "GET" - request_url: "http://www.example.com:80/" - request_protocol: "HTTP/1.1" - user_agent: "curl/7.46.0" - ssl_cipher: null - ssl_protocol: null - target_group_arn: "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067" - trace_id: "Root=1-58337364-23a8c76965a2ef7629b185e3" - traceability_id: "TID_12345" - domain_name: null - chosen_cert_arn: null - matched_rule_priority: "0" - request_creation_time: "2018-11-30T22:22:48.364000Z" - actions_executed: "forward" - redirect_url: null - error_reason: null - target_port_list: [] - target_status_code_list: [] - classification: null - classification_reason: null - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_aws_cloudwatch_log_subscription_message.cue b/website/cue/reference/remap/functions/parse_aws_cloudwatch_log_subscription_message.cue deleted file mode 100644 index 4a24c2ec7cd23..0000000000000 --- a/website/cue/reference/remap/functions/parse_aws_cloudwatch_log_subscription_message.cue +++ /dev/null @@ -1,61 +0,0 @@ -package metadata - -remap: functions: parse_aws_cloudwatch_log_subscription_message: { - category: "Parse" - description: """ - Parses AWS CloudWatch Logs events (configured through AWS Cloudwatch subscriptions) from the - `aws_kinesis_firehose` source. - """ - - arguments: [ - { - name: "value" - description: "The string representation of the message to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted AWS CloudWatch Log subscription message.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse AWS Cloudwatch Log subscription message" - input: log: message: #""" - { - "messageType": "DATA_MESSAGE", - "owner": "111111111111", - "logGroup": "test", - "logStream": "test", - "subscriptionFilters": [ - "Destination" - ], - "logEvents": [ - { - "id": "35683658089614582423604394983260738922885519999578275840", - "timestamp": 1600110569039, - "message": "{\"bytes\":26780,\"datetime\":\"14/Sep/2020:11:45:41 -0400\",\"host\":\"157.130.216.193\",\"method\":\"PUT\",\"protocol\":\"HTTP/1.0\",\"referer\":\"https://www.principalcross-platform.io/markets/ubiquitous\",\"request\":\"/expedite/convergence\",\"source_type\":\"stdin\",\"status\":301,\"user-identifier\":\"-\"}" - } - ] - } - """# - source: #""" - parse_aws_cloudwatch_log_subscription_message!(.message) - """# - return: { - owner: "111111111111" - message_type: "DATA_MESSAGE" - log_group: "test" - log_stream: "test" - subscription_filters: ["Destination"] - log_events: [{ - id: "35683658089614582423604394983260738922885519999578275840" - message: "{\"bytes\":26780,\"datetime\":\"14/Sep/2020:11:45:41 -0400\",\"host\":\"157.130.216.193\",\"method\":\"PUT\",\"protocol\":\"HTTP/1.0\",\"referer\":\"https://www.principalcross-platform.io/markets/ubiquitous\",\"request\":\"/expedite/convergence\",\"source_type\":\"stdin\",\"status\":301,\"user-identifier\":\"-\"}" - timestamp: "2020-09-14T19:09:29.039Z" - }] - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_aws_vpc_flow_log.cue b/website/cue/reference/remap/functions/parse_aws_vpc_flow_log.cue deleted file mode 100644 index d64042b9905a4..0000000000000 --- a/website/cue/reference/remap/functions/parse_aws_vpc_flow_log.cue +++ /dev/null @@ -1,107 +0,0 @@ -package metadata - -remap: functions: parse_aws_vpc_flow_log: { - category: "Parse" - description: """ - Parses `value` in the [VPC Flow Logs format](\(urls.aws_vpc_flow_logs)). - """ - - arguments: [ - { - name: "value" - description: "VPC Flow Log." - required: true - type: ["string"] - }, - { - name: "format" - description: "VPC Flow Log format." - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted AWS VPC Flow log.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse AWS VPC Flow log (default format)" - source: #""" - parse_aws_vpc_flow_log!("2 123456789010 eni-1235b8ca123456789 - - - - - - - 1431280876 1431280934 - NODATA") - """# - return: { - "version": 2 - "account_id": "123456789010" - "interface_id": "eni-1235b8ca123456789" - "srcaddr": null - "dstaddr": null - "srcport": null - "dstport": null - "protocol": null - "packets": null - "bytes": null - "start": 1431280876 - "end": 1431280934 - "action": null - "log_status": "NODATA" - } - }, - { - title: "Parse AWS VPC Flow log (custom format)" - source: #""" - parse_aws_vpc_flow_log!( - "- eni-1235b8ca123456789 10.0.1.5 10.0.0.220 10.0.1.5 203.0.113.5", - "instance_id interface_id srcaddr dstaddr pkt_srcaddr pkt_dstaddr" - ) - """# - return: { - "instance_id": null - "interface_id": "eni-1235b8ca123456789" - "srcaddr": "10.0.1.5" - "dstaddr": "10.0.0.220" - "pkt_srcaddr": "10.0.1.5" - "pkt_dstaddr": "203.0.113.5" - } - }, - { - title: "Parse AWS VPC Flow log including v5 fields" - source: #""" - parse_aws_vpc_flow_log!("5 52.95.128.179 10.0.0.71 80 34210 6 1616729292 1616729349 IPv4 14 15044 123456789012 vpc-abcdefab012345678 subnet-aaaaaaaa012345678 i-0c50d5961bcb2d47b eni-1235b8ca123456789 ap-southeast-2 apse2-az3 - - ACCEPT 19 52.95.128.179 10.0.0.71 S3 - - ingress OK", - format: "version srcaddr dstaddr srcport dstport protocol start end type packets bytes account_id vpc_id subnet_id instance_id interface_id region az_id sublocation_type sublocation_id action tcp_flags pkt_srcaddr pkt_dstaddr pkt_src_aws_service pkt_dst_aws_service traffic_path flow_direction log_status") - """# - return: { - "account_id": "123456789012" - "action": "ACCEPT" - "az_id": "apse2-az3" - "bytes": 15044 - "dstaddr": "10.0.0.71" - "dstport": 34210 - "end": 1616729349 - "flow_direction": "ingress" - "instance_id": "i-0c50d5961bcb2d47b" - "interface_id": "eni-1235b8ca123456789" - "log_status": "OK" - "packets": 14 - "pkt_dst_aws_service": null - "pkt_dstaddr": "10.0.0.71" - "pkt_src_aws_service": "S3" - "pkt_srcaddr": "52.95.128.179" - "protocol": 6 - "region": "ap-southeast-2" - "srcaddr": "52.95.128.179" - "srcport": 80 - "start": 1616729292 - "sublocation_id": null - "sublocation_type": null - "subnet_id": "subnet-aaaaaaaa012345678" - "tcp_flags": 19 - "traffic_path": null - "type": "IPv4" - "version": 5 - "vpc_id": "vpc-abcdefab012345678" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_bytes.cue b/website/cue/reference/remap/functions/parse_bytes.cue deleted file mode 100644 index 290c8e752e756..0000000000000 --- a/website/cue/reference/remap/functions/parse_bytes.cue +++ /dev/null @@ -1,73 +0,0 @@ -package metadata - -remap: functions: parse_bytes: { - category: "Parse" - description: """ - Parses the `value` into a human-readable bytes format specified by `unit` and `base`. - """ - - arguments: [ - { - name: "value" - description: "The string of the duration with either binary or SI unit." - required: true - type: ["string"] - }, - { - name: "unit" - description: "The output units for the byte." - required: true - type: ["string"] - enum: { - B: "Bytes" - kiB: "Kilobytes (1024 bytes)" - MiB: "Megabytes (1024 ** 2 bytes)" - GiB: "Gigabytes (1024 ** 3 bytes)" - TiB: "Terabytes (1024 gigabytes)" - PiB: "Petabytes (1024 ** 2 gigabytes)" - EiB: "Exabytes (1024 ** 3 gigabytes)" - kB: "Kilobytes (1 thousand bytes in SI)" - MB: "Megabytes (1 million bytes in SI)" - GB: "Gigabytes (1 billion bytes in SI)" - TB: "Terabytes (1 thousand gigabytes in SI)" - PB: "Petabytes (1 million gigabytes in SI)" - EB: "Exabytes (1 billion gigabytes in SI)" - } - }, - { - name: "base" - description: "The base for the byte, either 2 or 10." - required: false - type: ["string"] - default: 2 - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted bytes.", - ] - return: types: ["float"] - - examples: [ - { - title: "Parse bytes (kilobytes)" - source: #""" - parse_bytes!("1024KiB", unit: "MiB") - """# - return: 1.0 - }, - { - title: "Parse bytes in SI unit (terabytes)" - source: #""" - parse_bytes!("4TB", unit: "MB", base: "10") - """# - return: 4000000.0 - }, - { - title: "Parse bytes in ambiguous unit (gigabytes)" - source: #""" - parse_bytes!("1GB", unit: "B", base: "2") - """# - return: 1073741824.0 - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_cbor.cue b/website/cue/reference/remap/functions/parse_cbor.cue deleted file mode 100644 index 5cf99d1ae0e58..0000000000000 --- a/website/cue/reference/remap/functions/parse_cbor.cue +++ /dev/null @@ -1,36 +0,0 @@ -package metadata - -remap: functions: parse_cbor: { - category: "Parse" - description: """ - Parses the `value` as [CBOR](\(urls.cbor)). - """ - notices: [ - """ - Only CBOR types are returned. - """, - ] - - arguments: [ - { - name: "value" - description: "The CBOR payload to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid CBOR-formatted payload.", - ] - return: types: ["boolean", "integer", "float", "string", "object", "array", "null"] - - examples: [ - { - title: "Parse CBOR" - source: #""" - parse_cbor!(decode_base64!("oWVmaWVsZGV2YWx1ZQ==")) - """# - return: field: "value" - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_cef.cue b/website/cue/reference/remap/functions/parse_cef.cue deleted file mode 100644 index 5c1d7a415692e..0000000000000 --- a/website/cue/reference/remap/functions/parse_cef.cue +++ /dev/null @@ -1,107 +0,0 @@ -package metadata - -remap: functions: parse_cef: { - category: "Parse" - description: """ - Parses the `value` in CEF (Common Event Format) format. Ignores everything up to CEF header. Empty values are returned as empty strings. Surrounding quotes are removed from values. - """ - notices: [ - """ - All values are returned as strings. We recommend manually coercing values to desired types as you see fit. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "translate_custom_fields" - description: "Toggles translation of custom field pairs to `key:value`." - required: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted CEF string.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse output generated by PTA" - source: #""" - parse_cef!( - "CEF:0|CyberArk|PTA|12.6|1|Suspected credentials theft|8|suser=mike2@prod1.domain.com shost=prod1.domain.com src=1.1.1.1 duser=andy@dev1.domain.com dhost=dev1.domain.com dst=2.2.2.2 cs1Label=ExtraData cs1=None cs2Label=EventID cs2=52b06812ec3500ed864c461e deviceCustomDate1Label=detectionDate deviceCustomDate1=1388577900000 cs3Label=PTAlink cs3=https://1.1.1.1/incidents/52b06812ec3500ed864c461e cs4Label=ExternalLink cs4=None" - ) - """# - return: { - "cefVersion": "0" - "deviceVendor": "CyberArk" - "deviceProduct": "PTA" - "deviceVersion": "12.6" - "deviceEventClassId": "1" - "name": "Suspected credentials theft" - "severity": "8" - "suser": "mike2@prod1.domain.com" - "shost": "prod1.domain.com" - "src": "1.1.1.1" - "duser": "andy@dev1.domain.com" - "dhost": "dev1.domain.com" - "dst": "2.2.2.2" - "cs1Label": "ExtraData" - "cs1": "None" - "cs2Label": "EventID" - "cs2": "52b06812ec3500ed864c461e" - "deviceCustomDate1Label": "detectionDate" - "deviceCustomDate1": "1388577900000" - "cs3Label": "PTAlink" - "cs3": "https://1.1.1.1/incidents/52b06812ec3500ed864c461e" - "cs4Label": "ExternalLink" - "cs4": "None" - } - }, - { - title: "Ignore syslog header" - source: #""" - parse_cef!( - "Sep 29 08:26:10 host CEF:1|Security|threatmanager|1.0|100|worm successfully stopped|10|src=10.0.0.1 dst=2.1.2.2 spt=1232" - ) - """# - return: { - "cefVersion": "1" - "deviceVendor": "Security" - "deviceProduct": "threatmanager" - "deviceVersion": "1.0" - "deviceEventClassId": "100" - "name": "worm successfully stopped" - "severity": "10" - "src": "10.0.0.1" - "dst": "2.1.2.2" - "spt": "1232" - } - }, - { - title: "Translate custom fields" - source: #""" - parse_cef!( - "CEF:0|Dev|firewall|2.2|1|Connection denied|5|c6a1=2345:0425:2CA1:0000:0000:0567:5673:23b5 c6a1Label=Device IPv6 Address", - translate_custom_fields: true - ) - """# - return: { - "cefVersion": "0" - "deviceVendor": "Dev" - "deviceProduct": "firewall" - "deviceVersion": "2.2" - "deviceEventClassId": "1" - "name": "Connection denied" - "severity": "5" - "Device IPv6 Address": "2345:0425:2CA1:0000:0000:0567:5673:23b5" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_common_log.cue b/website/cue/reference/remap/functions/parse_common_log.cue deleted file mode 100644 index 29acc391c2a78..0000000000000 --- a/website/cue/reference/remap/functions/parse_common_log.cue +++ /dev/null @@ -1,80 +0,0 @@ -package metadata - -remap: functions: parse_common_log: { - category: "Parse" - description: """ - Parses the `value` using the [Common Log Format](\(urls.apache_common)) (CLF). - """ - notices: [ - """ - Missing information in the log message may be indicated by `-`. These fields are omitted in the result. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "timestamp_format" - description: """ - The [date/time format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) to use for - encoding the timestamp. - """ - required: false - default: "%d/%b/%Y:%T %z" - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` does not match the Common Log Format.", - "`timestamp_format` is not a valid format string.", - "The timestamp in `value` fails to parse using the provided `timestamp_format`.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse using Common Log Format (with default timestamp format)" - source: #""" - parse_common_log!("127.0.0.1 bob frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326") - """# - return: { - host: "127.0.0.1" - identity: "bob" - user: "frank" - timestamp: "2000-10-10T20:55:36Z" - message: "GET /apache_pb.gif HTTP/1.0" - method: "GET" - path: "/apache_pb.gif" - protocol: "HTTP/1.0" - status: 200 - size: 2326 - } - }, - { - title: "Parse using Common Log Format (with custom timestamp format)" - source: #""" - parse_common_log!( - "127.0.0.1 bob frank [2000-10-10T20:55:36Z] \"GET /apache_pb.gif HTTP/1.0\" 200 2326", - "%+" - ) - """# - return: { - host: "127.0.0.1" - identity: "bob" - user: "frank" - timestamp: "2000-10-10T20:55:36Z" - message: "GET /apache_pb.gif HTTP/1.0" - method: "GET" - path: "/apache_pb.gif" - protocol: "HTTP/1.0" - status: 200 - size: 2326 - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_csv.cue b/website/cue/reference/remap/functions/parse_csv.cue deleted file mode 100644 index fc1f01f2517c1..0000000000000 --- a/website/cue/reference/remap/functions/parse_csv.cue +++ /dev/null @@ -1,51 +0,0 @@ -package metadata - -remap: functions: parse_csv: { - category: "Parse" - description: #""" - Parses a single CSV formatted row. Only the first row is parsed in case of multiline input value. - """# - notices: [ - """ - All values are returned as strings. We recommend manually coercing values to desired types as you see fit. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "delimiter" - description: "The field delimiter to use when parsing. Must be a single-byte utf8 character." - required: false - default: "," - type: ["string"] - }, - ] - internal_failure_reasons: [ - "The delimiter must be a single-byte UTF-8 character.", - "`value` is not a valid CSV string.", - ] - return: types: ["array"] - - examples: [ - { - title: "Parse a single CSV formatted row" - source: #""" - parse_csv!("foo,bar,\"foo \"\", bar\"") - """# - return: ["foo", "bar", #"foo ", bar"#] - }, - { - title: "Parse a single CSV formatted row with custom delimiter" - source: #""" - parse_csv!("foo bar", delimiter: " ") - """# - return: ["foo", "bar"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_dnstap.cue b/website/cue/reference/remap/functions/parse_dnstap.cue deleted file mode 100644 index de91822af17d7..0000000000000 --- a/website/cue/reference/remap/functions/parse_dnstap.cue +++ /dev/null @@ -1,142 +0,0 @@ -package metadata - -remap: functions: parse_dnstap: { - category: "Parse" - description: """ - Parses the `value` as base64 encoded DNSTAP data. - """ - notices: [] - - arguments: [ - { - name: "value" - description: "The base64 encoded representation of the DNSTAP data to parse." - required: true - type: ["string"] - }, - { - name: "lowercase_hostnames" - description: """ - Whether to turn all hostnames found in resulting data lowercase, for consistency. - """ - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid base64 encoded string.", - "dnstap parsing failed for `value`", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse dnstap query message" - source: #""" - parse_dnstap!("ChVqYW1lcy1WaXJ0dWFsLU1hY2hpbmUSC0JJTkQgOS4xNi4zGgBy5wEIAxACGAEiEAAAAAAAAAAAAAAAAAAAAAAqECABBQJwlAAAAAAAAAAAADAw8+0CODVA7+zq9wVNMU3WNlI2kwIAAAABAAAAAAABCWZhY2Vib29rMQNjb20AAAEAAQAAKQIAAACAAAAMAAoACOxjCAG9zVgzWgUDY29tAGAAbQAAAAByZLM4AAAAAQAAAAAAAQJoNQdleGFtcGxlA2NvbQAABgABAAApBNABAUAAADkADwA1AAlubyBTRVAgbWF0Y2hpbmcgdGhlIERTIGZvdW5kIGZvciBkbnNzZWMtZmFpbGVkLm9yZy54AQ==") - """# - return: { - "dataType": "Message" - "dataTypeId": 1 - "extraInfo": "" - "messageType": "ResolverQuery" - "messageTypeId": 3 - "queryZone": "com." - "requestData": { - "fullRcode": 0 - "header": { - "aa": false - "ad": false - "anCount": 0 - "arCount": 1 - "cd": false - "id": 37634 - "nsCount": 0 - "opcode": 0 - "qdCount": 1 - "qr": 0 - "ra": false - "rcode": 0 - "rd": false - "tc": false - } - "opt": { - "do": true - "ednsVersion": 0 - "extendedRcode": 0 - "options": [ - { - "optCode": 10 - "optName": "Cookie" - "optValue": "7GMIAb3NWDM=" - }, - ] - "udpPayloadSize": 512 - } - "question": [ - { - "class": "IN" - "domainName": "facebook1.com." - "questionType": "A" - "questionTypeId": 1 - }, - ] - "rcodeName": "NoError" - } - "responseData": { - "fullRcode": 16 - "header": { - "aa": false - "ad": false - "anCount": 0 - "arCount": 1 - "cd": false - "id": 45880 - "nsCount": 0 - "opcode": 0 - "qdCount": 1 - "qr": 0 - "ra": false - "rcode": 16 - "rd": false - "tc": false - } - "opt": { - "do": false - "ednsVersion": 1 - "extendedRcode": 1 - "ede": [ - { - "extraText": "no SEP matching the DS found for dnssec-failed.org." - "infoCode": 9 - "purpose": "DNSKEY Missing" - }, - ] - "udpPayloadSize": 1232 - } - "question": [ - { - "class": "IN" - "domainName": "h5.example.com." - "questionType": "SOA" - "questionTypeId": 6 - }, - ] - "rcodeName": "BADVERS" - } - "responseAddress": "2001:502:7094::30" - "responsePort": 53 - "serverId": "james-Virtual-Machine" - "serverVersion": "BIND 9.16.3" - "socketFamily": "INET6" - "socketProtocol": "UDP" - "sourceAddress": "::" - "sourcePort": 46835 - "time": 1_593_489_007_920_014_129 - "timePrecision": "ns" - "timestamp": "2020-06-30T03:50:07.920014129Z" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_duration.cue b/website/cue/reference/remap/functions/parse_duration.cue deleted file mode 100644 index 9eb82dacc97ae..0000000000000 --- a/website/cue/reference/remap/functions/parse_duration.cue +++ /dev/null @@ -1,56 +0,0 @@ -package metadata - -remap: functions: parse_duration: { - category: "Parse" - description: """ - Parses the `value` into a human-readable duration format specified by `unit`. - """ - - arguments: [ - { - name: "value" - description: "The string of the duration." - required: true - type: ["string"] - }, - { - name: "unit" - description: "The output units for the duration." - required: true - type: ["string"] - enum: { - ns: "Nanoseconds (1 billion nanoseconds in a second)" - us: "Microseconds (1 million microseconds in a second)" - µs: "Microseconds (1 million microseconds in a second)" - ms: "Milliseconds (1 thousand microseconds in a second)" - cs: "Centiseconds (100 centiseconds in a second)" - ds: "Deciseconds (10 deciseconds in a second)" - s: "Seconds" - m: "Minutes (60 seconds in a minute)" - h: "Hours (60 minutes in an hour)" - d: "Days (24 hours in a day)" - } - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted duration.", - ] - return: types: ["float"] - - examples: [ - { - title: "Parse duration (milliseconds)" - source: #""" - parse_duration!("1005ms", unit: "s") - """# - return: 1.005 - }, - { - title: "Parse multiple durations (seconds & milliseconds)" - source: #""" - parse_duration!("1s 1ms", unit: "ms") - """# - return: 1001.0 - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_etld.cue b/website/cue/reference/remap/functions/parse_etld.cue deleted file mode 100644 index 781a1e845ab67..0000000000000 --- a/website/cue/reference/remap/functions/parse_etld.cue +++ /dev/null @@ -1,90 +0,0 @@ -package metadata - -remap: functions: parse_etld: { - category: "Parse" - description: """ - Parses the [eTLD](\(urls.etld)) from `value` representing domain name. - """ - - arguments: [ - { - name: "value" - description: "The domain string." - required: true - type: ["string"] - }, - { - name: "plus_parts" - description: """ - Can be provided to get additional parts of the domain name. When 1 is passed, - eTLD+1 will be returned, which represents a domain registrable by a single - organization. Higher numbers will return subdomains. - """ - required: false - type: ["integer"] - default: false - }, - { - name: "psl" - description: """ - Can be provided to use a different public suffix list. - - By default, https://publicsuffix.org/list/public_suffix_list.dat is used. - """ - required: false - type: ["string"] - default: false - }, - ] - internal_failure_reasons: [ - "unable to determine eTLD for `value`", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse eTLD" - source: #""" - parse_etld!("sub.sussex.ac.uk") - """# - return: { - etld: "ac.uk" - etld_plus: "ac.uk" - known_suffix: true - } - }, - { - title: "Parse eTLD+1" - source: #""" - parse_etld!("sub.sussex.ac.uk", plus_parts: 1) - """# - return: { - etld: "ac.uk" - etld_plus: "sussex.ac.uk" - known_suffix: true - } - }, - { - title: "Parse eTLD with unknown suffix" - source: #""" - parse_etld!("vector.acmecorp") - """# - return: { - etld: "acmecorp" - etld_plus: "acmecorp" - known_suffix: false - } - }, - { - title: "Parse eTLD with custom PSL" - source: #""" - parse_etld!("vector.acmecorp", psl: "resources/public_suffix_list.dat") - """# - return: { - etld: "acmecorp" - etld_plus: "acmecorp" - known_suffix: false - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_float.cue b/website/cue/reference/remap/functions/parse_float.cue deleted file mode 100644 index ebf6b9394b9c1..0000000000000 --- a/website/cue/reference/remap/functions/parse_float.cue +++ /dev/null @@ -1,40 +0,0 @@ -package metadata - -remap: functions: parse_float: { - category: "String" - description: """ - Parses the string `value` representing a floating point number in base 10 to a float. - """ - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a string.", - ] - return: types: ["float"] - - examples: [ - { - title: "Parse negative integer" - source: #"parse_float!("-42")"# - return: -42.0 - }, - { - title: "Parse negative integer" - source: #"parse_float!("42.38")"# - return: 42.38 - }, - { - title: "Scientific notation" - source: #"parse_float!("2.5e3")"# - return: 2500.0 - }, - ] - -} diff --git a/website/cue/reference/remap/functions/parse_glog.cue b/website/cue/reference/remap/functions/parse_glog.cue deleted file mode 100644 index 3fc76b44ca2b5..0000000000000 --- a/website/cue/reference/remap/functions/parse_glog.cue +++ /dev/null @@ -1,36 +0,0 @@ -package metadata - -remap: functions: parse_glog: { - category: "Parse" - description: """ - Parses the `value` using the [glog (Google Logging Library)](\(urls.glog)) format. - """ - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` does not match the `glog` format.", - ] - return: types: ["object"] - examples: [ - { - title: "Parse using glog" - source: #""" - parse_glog!("I20210131 14:48:54.411655 15520 main.c++:9] Hello world!") - """# - return: { - level: "info" - timestamp: "2021-01-31T14:48:54.411655Z" - id: 15520 - file: "main.c++" - line: 9 - message: "Hello world!" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_grok.cue b/website/cue/reference/remap/functions/parse_grok.cue deleted file mode 100644 index c207ae4f829b2..0000000000000 --- a/website/cue/reference/remap/functions/parse_grok.cue +++ /dev/null @@ -1,51 +0,0 @@ -package metadata - -remap: functions: parse_grok: { - category: "Parse" - description: """ - Parses the `value` using the [`grok`](\(urls.grok)) format. All patterns [listed here](\(urls.grok_patterns)) - are supported. - """ - notices: [ - """ - We recommend using community-maintained Grok patterns when possible, as they're more likely to be properly - vetted and improved over time than bespoke patterns. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "pattern" - description: "The [Grok pattern](https://github.com/daschl/grok/tree/master/patterns)." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` fails to parse using the provided `pattern`.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse using Grok" - source: #""" - parse_grok!( - "2020-10-02T23:22:12.223222Z info Hello world", - "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}" - ) - """# - return: { - timestamp: "2020-10-02T23:22:12.223222Z" - level: "info" - message: "Hello world" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_groks.cue b/website/cue/reference/remap/functions/parse_groks.cue deleted file mode 100644 index b3ca8625a7679..0000000000000 --- a/website/cue/reference/remap/functions/parse_groks.cue +++ /dev/null @@ -1,93 +0,0 @@ -package metadata - -remap: functions: parse_groks: { - category: "Parse" - description: """ - Parses the `value` using multiple [`grok`](\(urls.grok)) patterns. All patterns [listed here](\(urls.grok_patterns)) - are supported. - """ - notices: [ - """ - We recommend using community-maintained Grok patterns when possible, as they're more likely to be properly - vetted and improved over time than bespoke patterns. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "patterns" - description: "The [Grok patterns](https://github.com/daschl/grok/tree/master/patterns), which are tried in order until the first match." - required: true - type: ["array"] - }, - { - name: "aliases" - description: "The shared set of grok aliases that can be referenced in the patterns to simplify them." - required: false - default: true - type: ["object"] - }, - { - name: "alias_sources" - description: "Path to the file containing aliases in a JSON format." - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` fails to parse using the provided `pattern`.", - "`patterns` is not an array.", - "`aliases` is not an object.", - "`alias_sources` is not a string or doesn't point to a valid file.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse using multiple Grok patterns" - source: #""" - parse_groks!( - "2020-10-02T23:22:12.223222Z info Hello world", - patterns: [ - "%{common_prefix} %{_status} %{_message}", - "%{common_prefix} %{_message}", - ], - aliases: { - "common_prefix": "%{_timestamp} %{_loglevel}", - "_timestamp": "%{TIMESTAMP_ISO8601:timestamp}", - "_loglevel": "%{LOGLEVEL:level}", - "_status": "%{POSINT:status}", - "_message": "%{GREEDYDATA:message}" - } - ) - """# - return: { - timestamp: "2020-10-02T23:22:12.223222Z" - level: "info" - message: "Hello world" - } - }, - { - title: "Parse using aliases from file" - source: #""" - parse_groks!( - "username=foo", - patterns: [ "%{PATTERN_A}" ], - alias_sources: [ "path/to/aliases.json" ] - ) - # aliases.json contents: - # { - # "PATTERN_A": "%{PATTERN_B}", - # "PATTERN_B": "username=%{USERNAME:username}" - # } - """# - skip_test: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_influxdb.cue b/website/cue/reference/remap/functions/parse_influxdb.cue deleted file mode 100644 index b220fa0180c5e..0000000000000 --- a/website/cue/reference/remap/functions/parse_influxdb.cue +++ /dev/null @@ -1,109 +0,0 @@ -package metadata - -remap: functions: parse_influxdb: { - category: "Parse" - description: """ - Parses the `value` as an [InfluxDB line protocol](https://docs.influxdata.com/influxdb/cloud/reference/syntax/line-protocol/) - string, producing a list of Vector-compatible metrics. - """ - notices: [ - """ - This function will return a log event with the shape of a Vector-compatible metric, but not a metric event itself. - You will likely want to pipe the output of this function through a `log_to_metric` transform with the option `all_metrics` - set to `true` to convert the metric-shaped log events to metric events so _real_ metrics are produced. - """, - """ - The only metric type that is produced is a `gauge`. Each metric name is prefixed with the `measurement` field, followed - by an underscore (`_`), and then the `field key` field. - """, - """ - `string` is the only type that is not supported as a field value, - due to limitations of Vector's metric model. - """, - ] - arguments: [ - { - name: "value" - description: "The string representation of the InfluxDB line protocol to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid InfluxDB line protocol string.", - "field set contains a field value of type `string`.", - "field set contains a `NaN` field value.", - ] - return: types: ["array"] - - examples: [ - { - title: "Parse InfluxDB line protocol" - source: #""" - parse_influxdb!("cpu,host=A,region=us-west usage_system=64i,usage_user=10u,temperature=50.5,on=true,sleep=false 1590488773254420000") - """# - return: [ - { - "name": "cpu_usage_system" - "tags": { - "host": "A" - "region": "us-west" - } - "timestamp": "2020-05-26T10:26:13.254420Z" - "kind": "absolute" - "gauge": { - "value": 64.0 - } - }, - { - "name": "cpu_usage_user" - "tags": { - "host": "A" - "region": "us-west" - } - "timestamp": "2020-05-26T10:26:13.254420Z" - "kind": "absolute" - "gauge": { - "value": 10.0 - } - }, - { - "name": "cpu_temperature" - "tags": { - "host": "A" - "region": "us-west" - } - "timestamp": "2020-05-26T10:26:13.254420Z" - "kind": "absolute" - "gauge": { - "value": 50.5 - } - }, - { - "name": "cpu_on" - "tags": { - "host": "A" - "region": "us-west" - } - "timestamp": "2020-05-26T10:26:13.254420Z" - "kind": "absolute" - "gauge": { - "value": 1.0 - } - }, - { - "name": "cpu_sleep" - "tags": { - "host": "A" - "region": "us-west" - } - "timestamp": "2020-05-26T10:26:13.254420Z" - "kind": "absolute" - "gauge": { - "value": 0.0 - } - }, - ] - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_int.cue b/website/cue/reference/remap/functions/parse_int.cue deleted file mode 100644 index 6c03784ba0117..0000000000000 --- a/website/cue/reference/remap/functions/parse_int.cue +++ /dev/null @@ -1,72 +0,0 @@ -package metadata - -remap: functions: parse_int: { - category: "Parse" - description: #""" - Parses the string `value` representing a number in an optional base/radix to an integer. - """# - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "base" - description: """ - The base the number is in. Must be between 2 and 36 (inclusive). - - If unspecified, the string prefix is used to - determine the base: "0b", 8 for "0" or "0o", 16 for "0x", - and 10 otherwise. - """ - required: false - type: ["integer"] - }, - ] - internal_failure_reasons: [ - "The base is not between 2 and 36.", - "The number cannot be parsed in the base.", - ] - return: types: ["integer"] - - examples: [ - { - title: "Parse decimal" - source: #""" - parse_int!("-42") - """# - return: -42 - }, - { - title: "Parse binary" - source: #""" - parse_int!("0b1001") - """# - return: 9 - }, - { - title: "Parse octal" - source: #""" - parse_int!("0o42") - """# - return: 34 - }, - { - title: "Parse hexadecimal" - source: #""" - parse_int!("0x2a") - """# - return: 42 - }, - { - title: "Parse explicit base" - source: #""" - parse_int!("2a", 17) - """# - return: 44 - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_json.cue b/website/cue/reference/remap/functions/parse_json.cue deleted file mode 100644 index bc7b561b93af3..0000000000000 --- a/website/cue/reference/remap/functions/parse_json.cue +++ /dev/null @@ -1,64 +0,0 @@ -package metadata - -remap: functions: parse_json: { - category: "Parse" - description: """ - Parses the `value` as JSON. - """ - notices: [ - """ - Only JSON types are returned. If you need to convert a `string` into a `timestamp`, consider the - [`parse_timestamp`](#parse_timestamp) function. - """, - ] - - arguments: [ - { - name: "value" - description: "The string representation of the JSON to parse." - required: true - type: ["string"] - }, - { - name: "max_depth" - description: """ - Number of layers to parse for nested JSON-formatted documents. - The value must be in the range of 1 to 128. - """ - required: false - type: ["integer"] - }, - { - name: "lossy" - description: """ - Whether to parse the JSON in a lossy manner. Replaces invalid UTF-8 characters - with the Unicode character `�` (U+FFFD) if set to true, otherwise returns an error - if there are any invalid UTF-8 characters present. - """ - required: false - default: true - type: ["boolean"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid JSON-formatted payload.", - ] - return: types: ["boolean", "integer", "float", "string", "object", "array", "null"] - - examples: [ - { - title: "Parse JSON" - source: #""" - parse_json!("{\"key\": \"val\"}") - """# - return: key: "val" - }, - { - title: "Parse JSON with max_depth" - source: #""" - parse_json!("{\"top_level\":{\"key\": \"val\"}}", max_depth: 1) - """# - return: top_level: "{\"key\": \"val\"}" - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_key_value.cue b/website/cue/reference/remap/functions/parse_key_value.cue deleted file mode 100644 index e2e28211eec3b..0000000000000 --- a/website/cue/reference/remap/functions/parse_key_value.cue +++ /dev/null @@ -1,134 +0,0 @@ -package metadata - -remap: functions: parse_key_value: { - category: "Parse" - description: """ - Parses the `value` in key-value format. Also known as [logfmt](\(urls.logfmt)). - - * Keys and values can be wrapped with `"`. - * `"` characters can be escaped using `\\`. - """ - notices: [ - """ - All values are returned as strings or as an array of strings for duplicate keys. We recommend manually coercing values to desired types as you see fit. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "key_value_delimiter" - description: "The string that separates the key from the value." - required: false - default: "=" - type: ["string"] - }, - { - name: "field_delimiter" - description: "The string that separates each key-value pair." - required: false - default: " " - type: ["string"] - }, - { - name: "whitespace" - description: "Defines the acceptance of unnecessary whitespace surrounding the configured `key_value_delimiter`." - required: false - enum: { - lenient: "Ignore whitespace." - strict: "Parse whitespace as normal character." - } - default: "lenient" - type: ["string"] - }, - { - name: "accept_standalone_key" - description: "Whether a standalone key should be accepted, the resulting object associates such keys with the boolean value `true`." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted key-value string.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse logfmt log" - source: #""" - parse_key_value!( - "@timestamp=\"Sun Jan 10 16:47:39 EST 2021\" level=info msg=\"Stopping all fetchers\" tag#production=stopping_fetchers id=ConsumerFetcherManager-1382721708341 module=kafka.consumer.ConsumerFetcherManager" - ) - """# - return: { - "@timestamp": "Sun Jan 10 16:47:39 EST 2021" - level: "info" - msg: "Stopping all fetchers" - "tag#production": "stopping_fetchers" - id: "ConsumerFetcherManager-1382721708341" - module: "kafka.consumer.ConsumerFetcherManager" - } - }, - { - title: "Parse comma delimited log" - source: #""" - parse_key_value!( - "path:\"/cart_link\", host:store.app.com, fwd: \"102.30.171.16\", dyno: web.1, connect:0ms, service:87ms, status:304, bytes:632, protocol:https", - field_delimiter: ",", - key_value_delimiter: ":" - ) - """# - return: { - path: "/cart_link" - host: "store.app.com" - fwd: "102.30.171.16" - dyno: "web.1" - connect: "0ms" - service: "87ms" - status: "304" - bytes: "632" - protocol: "https" - } - }, - { - title: "Parse comma delimited log with standalone keys" - source: #""" - parse_key_value!( - "env:prod,service:backend,region:eu-east1,beta", - field_delimiter: ",", - key_value_delimiter: ":", - ) - """# - return: { - env: "prod" - service: "backend" - region: "eu-east1" - beta: true - } - }, - { - title: "Parse duplicate keys" - source: #""" - parse_key_value!( - "at=info,method=GET,path=\"/index\",status=200,tags=dev,tags=dummy", - field_delimiter: ",", - key_value_delimiter: "=", - ) - """# - return: { - at: "info" - method: "GET" - path: "/index" - status: "200" - tags: ["dev", "dummy"] - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_klog.cue b/website/cue/reference/remap/functions/parse_klog.cue deleted file mode 100644 index 7d6cfb0464e5a..0000000000000 --- a/website/cue/reference/remap/functions/parse_klog.cue +++ /dev/null @@ -1,41 +0,0 @@ -package metadata - -remap: functions: parse_klog: { - category: "Parse" - description: """ - Parses the `value` using the [klog](\(urls.klog)) format used by Kubernetes components. - """ - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` does not match the `klog` format.", - ] - return: types: ["object"] - notices: [ - """ - This function resolves the year for messages. If the current month is January and the provided month is December, it sets the year to the previous year. Otherwise, it sets the year to the current year. - """, - ] - examples: [ - { - title: "Parse using klog" - source: #""" - parse_klog!("I0505 17:59:40.692994 28133 klog.go:70] hello from klog") - """# - return: { - file: "klog.go" - id: 28133 - level: "info" - line: 70 - message: "hello from klog" - timestamp: "2026-05-05T17:59:40.692994Z" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_linux_authorization.cue b/website/cue/reference/remap/functions/parse_linux_authorization.cue deleted file mode 100644 index 216898569da0d..0000000000000 --- a/website/cue/reference/remap/functions/parse_linux_authorization.cue +++ /dev/null @@ -1,46 +0,0 @@ -package metadata - -remap: functions: parse_linux_authorization: { - category: "Parse" - description: """ - Parses Linux authorization logs usually found under either `/var/log/auth.log` (for Debian-based systems) or - `/var/log/secure` (for RedHat-based systems) according to [Syslog](\(urls.syslog)) format. - """ - notices: [ - """ - The function resolves the year for messages that don't include it. If the current month is January, and the message is for - December, it will take the previous year. Otherwise, take the current year. - """, - ] - - arguments: [ - { - name: "value" - description: "The text containing the message to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted Syslog message.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse Linux authorization event" - source: """ - parse_linux_authorization!( - s'Mar 23 01:49:58 localhost sshd[1111]: Accepted publickey for eng from 10.1.1.1 port 8888 ssh2: RSA SHA256:foobar' - ) - """ - return: { - appname: "sshd" - hostname: "localhost" - message: "Accepted publickey for eng from 10.1.1.1 port 8888 ssh2: RSA SHA256:foobar" - procid: 1111 - timestamp: "2026-03-23T01:49:58Z" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_logfmt.cue b/website/cue/reference/remap/functions/parse_logfmt.cue deleted file mode 100644 index 74332c1cd7fc7..0000000000000 --- a/website/cue/reference/remap/functions/parse_logfmt.cue +++ /dev/null @@ -1,45 +0,0 @@ -package metadata - -remap: functions: parse_logfmt: { - category: "Parse" - description: """ - Parses the `value` in [logfmt](\(urls.logfmt)). - - * Keys and values can be wrapped using the `\"` character. - * `\"` characters can be escaped by the `\\` character. - * As per this [logfmt specification](\(urls.logfmt_specs)), the `parse_logfmt` function - accepts standalone keys and assigns them a Boolean value of `true`. - """ - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted key-value string", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse logfmt log" - source: #""" - parse_logfmt!( - "@timestamp=\"Sun Jan 10 16:47:39 EST 2021\" level=info msg=\"Stopping all fetchers\" tag#production=stopping_fetchers id=ConsumerFetcherManager-1382721708341 module=kafka.consumer.ConsumerFetcherManager" - ) - """# - return: { - "@timestamp": "Sun Jan 10 16:47:39 EST 2021" - level: "info" - msg: "Stopping all fetchers" - "tag#production": "stopping_fetchers" - id: "ConsumerFetcherManager-1382721708341" - module: "kafka.consumer.ConsumerFetcherManager" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_nginx_log.cue b/website/cue/reference/remap/functions/parse_nginx_log.cue deleted file mode 100644 index 4e2ab088b7d82..0000000000000 --- a/website/cue/reference/remap/functions/parse_nginx_log.cue +++ /dev/null @@ -1,149 +0,0 @@ -package metadata - -remap: functions: parse_nginx_log: { - category: "Parse" - description: """ - Parses Nginx access and error log lines. Lines can be in [`combined`](\(urls.nginx_combined)), - [`ingress_upstreaminfo`](\(urls.nginx_ingress_upstreaminfo)), [`main`](\(urls.nginx_main)) or [`error`](\(urls.nginx_error)) format. - """ - notices: [ - """ - Missing information in the log message may be indicated by `-`. These fields are omitted in the result. - """, - """ - In case of `ingress_upstreaminfo` format the following fields may be safely omitted in the log message: `remote_addr`, `remote_user`, `http_referer`, `http_user_agent`, `proxy_alternative_upstream_name`, `upstream_addr`, `upstream_response_length`, `upstream_response_time`, `upstream_status`. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "timestamp_format" - description: """ - - The [date/time format](\(urls.chrono_time_formats)) to use for encoding the timestamp. The time is parsed - in local time if the timestamp doesn't specify a timezone. The default format is `%d/%b/%Y:%T %z` for - combined logs and `%Y/%m/%d %H:%M:%S` for error logs. - """ - required: false - default: "%d/%b/%Y:%T %z" - type: ["string"] - }, - { - name: "format" - description: "The format to use for parsing the log." - required: true - enum: { - "combined": "Nginx combined format" - "error": "Default Nginx error format" - "ingress_upstreaminfo": "Provides detailed upstream information (Nginx Ingress Controller)" - "main": "Nginx main format used by Docker images" - } - type: ["string"] - }, - ] - - internal_failure_reasons: [ - "`value` does not match the specified format.", - "`timestamp_format` is not a valid format string.", - "The timestamp in `value` fails to parse using the provided `timestamp_format`.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse via Nginx log format (combined)" - source: #""" - parse_nginx_log!( - s'172.17.0.1 - alice [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"', - "combined", - ) - """# - return: { - agent: "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" - client: "172.17.0.1" - compression: "2.75" - referer: "http://localhost/somewhere" - request: "POST /not-found HTTP/1.1" - size: 153 - status: 404 - timestamp: "2021-04-01T12:02:31Z" - user: "alice" - } - }, - { - title: "Parse via Nginx log format (error)" - source: #""" - parse_nginx_log!( - s'2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081"', - "error" - ) - """# - return: { - timestamp: "2021-04-01T13:02:31Z" - severity: "error" - pid: 31 - tid: 31 - cid: 1 - message: "open() \"/usr/share/nginx/html/not-found\" failed (2: No such file or directory)" - client: "172.17.0.1" - server: "localhost" - request: "POST /not-found HTTP/1.1" - host: "localhost:8081" - } - }, - { - title: "Parse via Nginx log format (ingress_upstreaminfo)" - source: #""" - parse_nginx_log!( - s'0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7', - "ingress_upstreaminfo" - ) - """# - return: { - body_bytes_size: 12312 - http_referer: "https://10.0.0.1/some/referer" - http_user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" - proxy_alternative_upstream_name: "some-other-upstream-5000" - proxy_upstream_name: "some-upstream-service-9000" - remote_addr: "0.0.0.0" - remote_user: "bob" - req_id: "752178adb17130b291aefd8c386279e7" - request: "GET /some/path HTTP/2.0" - request_length: 462 - request_time: 0.050 - status: 200 - timestamp: "2023-03-18T15:00:00Z" - upstream_addr: "10.0.50.80:9000" - upstream_response_length: 19437 - upstream_response_time: 0.049 - upstream_status: 200 - } - }, - { - title: "Parse via Nginx log format (main)" - source: #""" - parse_nginx_log!( - s'172.24.0.3 - alice [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "https://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1"', - "main" - ) - """# - return: { - body_bytes_size: 615 - http_referer: "https://domain.tld/path" - http_user_agent: "curl/8.11.1" - http_x_forwarded_for: "1.2.3.4, 10.10.1.1" - remote_addr: "172.24.0.3" - remote_user: "alice" - request: "GET / HTTP/1.1" - status: 200 - timestamp: "2024-12-31T17:32:06Z" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_proto.cue b/website/cue/reference/remap/functions/parse_proto.cue deleted file mode 100644 index 29557895e0df3..0000000000000 --- a/website/cue/reference/remap/functions/parse_proto.cue +++ /dev/null @@ -1,65 +0,0 @@ -package metadata - -remap: functions: parse_proto: { - category: "Parse" - description: """ - Parses the `value` as a protocol buffer payload. - """ - notices: [ - """ - Only proto messages are parsed and returned. - """, - ] - - arguments: [ - { - name: "value" - description: "The protocol buffer payload to parse." - required: true - type: ["string"] - }, - { - name: "desc_file" - description: """ - The path to the protobuf descriptor set file. Must be a literal string. - - This file is the output of protoc -o ... - """ - required: true - type: ["string"] - }, - { - name: "message_type" - description: """ - The name of the message type to use for serializing. - - Must be a literal string. - """ - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid proto payload.", - "`desc_file` file does not exist.", - "`message_type` message type does not exist in the descriptor file.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse proto" - source: #""" - parse_proto!(decode_base64!("Cgdzb21lb25lIggKBjEyMzQ1Ng=="), "resources/protobuf_descriptor_set.desc", "test_protobuf.Person") - """# - return: { - name: "someone" - phones: [ - { - number: "123456" - }, - ] - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_query_string.cue b/website/cue/reference/remap/functions/parse_query_string.cue deleted file mode 100644 index a5bc4308db839..0000000000000 --- a/website/cue/reference/remap/functions/parse_query_string.cue +++ /dev/null @@ -1,47 +0,0 @@ -package metadata - -remap: functions: parse_query_string: { - category: "Parse" - description: #""" - Parses the `value` as a query string. - """# - notices: [ - """ - All values are returned as strings. We recommend manually coercing values to desired types as you see fit. Empty keys and values are allowed. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["object"] - - examples: [ - { - title: "Parse query string" - source: #""" - parse_query_string("foo=%2B1&bar=2&bar=3&xyz") - """# - return: { - foo: "+1" - bar: ["2", "3"] - xyz: "" - } - }, - { - title: "Parse Ruby on Rails' query string" - source: #""" - parse_query_string("?foo%5b%5d=1&foo%5b%5d=2") - """# - return: { - "foo[]": ["1", "2"] - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_regex.cue b/website/cue/reference/remap/functions/parse_regex.cue deleted file mode 100644 index fc95669169749..0000000000000 --- a/website/cue/reference/remap/functions/parse_regex.cue +++ /dev/null @@ -1,78 +0,0 @@ -package metadata - -remap: functions: parse_regex: { - category: "Parse" - description: """ - Parses the `value` using the provided [Regex](\(urls.regex)) `pattern`. - - This function differs from the `parse_regex_all` function in that it returns only the first match. - """ - notices: [ - """ - VRL aims to provide purpose-specific [parsing functions](\(urls.vrl_parsing_functions)) for common log formats. - Before reaching for the `parse_regex` function, see if a VRL [`parse_*` function](\(urls.vrl_parsing_functions)) - already exists for your format. If not, we recommend [opening an issue](\(urls.new_feature_request)) to request - support for the desired format. - """, - """ - All values are returned as strings. We recommend manually coercing values to desired types as you see fit. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to search." - required: true - type: ["string"] - }, - { - name: "pattern" - description: "The regular expression pattern to search against." - required: true - type: ["regex"] - }, - { - name: "numeric_groups" - description: """ - If true, the index of each group in the regular expression is also captured. Index `0` - contains the whole match. - """ - required: false - default: false - type: ["regex"] - }, - ] - internal_failure_reasons: [ - "`value` fails to parse using the provided `pattern`.", - ] - return: { - types: ["object"] - rules: [ - "Matches return all capture groups corresponding to the leftmost matches in the text.", - "Raises an error if no match is found.", - ] - } - - examples: [ - { - title: "Parse using Regex (with capture groups)" - source: """ - parse_regex!("first group and second group.", r'(?P.*?) group') - """ - return: { - number: "first" - } - }, - { - title: "Parse using Regex (without capture groups)" - source: """ - parse_regex!("first group and second group.", r'(\\w+) group', numeric_groups: true) - """ - return: { - "0": "first group" - "1": "first" - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_regex_all.cue b/website/cue/reference/remap/functions/parse_regex_all.cue deleted file mode 100644 index 0607a3fec176d..0000000000000 --- a/website/cue/reference/remap/functions/parse_regex_all.cue +++ /dev/null @@ -1,68 +0,0 @@ -package metadata - -remap: functions: parse_regex_all: { - category: "Parse" - description: """ - Parses the `value` using the provided [Regex](\(urls.regex)) `pattern`. - - This function differs from the `parse_regex` function in that it returns _all_ matches, not just the first. - """ - notices: remap.functions.parse_regex.notices - - arguments: [ - { - name: "value" - description: "The string to search." - required: true - type: ["string"] - }, - { - name: "pattern" - description: "The regular expression pattern to search against." - required: true - type: ["regex"] - }, - { - name: "numeric_groups" - description: """ - If `true`, the index of each group in the regular expression is also captured. Index `0` - contains the whole match. - """ - required: false - default: false - type: ["regex"] - }, - ] - internal_failure_reasons: [ - "`value` is not a string.", - "`pattern` is not a regex.", - ] - return: { - types: ["array"] - rules: [ - "Matches return all capture groups corresponding to the leftmost matches in the text.", - "Raises an error if no match is found.", - ] - } - - examples: [ - { - title: "Parse using Regex (all matches)" - source: """ - parse_regex_all!("first group and second group.", r'(?P\\w+) group', numeric_groups: true) - """ - return: [ - { - number: "first" - "0": "first group" - "1": "first" - }, - { - number: "second" - "0": "second group" - "1": "second" - }, - ] - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_ruby_hash.cue b/website/cue/reference/remap/functions/parse_ruby_hash.cue deleted file mode 100644 index 9d733c6ea2c5c..0000000000000 --- a/website/cue/reference/remap/functions/parse_ruby_hash.cue +++ /dev/null @@ -1,45 +0,0 @@ -package metadata - -remap: functions: parse_ruby_hash: { - category: "Parse" - description: """ - Parses the `value` as ruby hash. - """ - - notices: [ - """ - Only ruby types are returned. If you need to convert a `string` into a `timestamp`, consider the - [`parse_timestamp`](#parse_timestamp) function. - """, - ] - arguments: [ - { - name: "value" - description: "The string representation of the ruby hash to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid ruby hash formatted payload.", - ] - - return: types: ["object"] - - examples: [ - { - title: "Parse ruby hash" - source: """ - parse_ruby_hash!(s'{ "test" => "value", "testNum" => 0.2, "testObj" => { "testBool" => true, "testNull" => nil } }') - """ - return: { - test: "value" - testNum: 0.2 - testObj: { - testBool: true - testNull: null - } - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_syslog.cue b/website/cue/reference/remap/functions/parse_syslog.cue deleted file mode 100644 index cb17be2b04146..0000000000000 --- a/website/cue/reference/remap/functions/parse_syslog.cue +++ /dev/null @@ -1,58 +0,0 @@ -package metadata - -remap: functions: parse_syslog: { - category: "Parse" - description: """ - Parses the `value` in [Syslog](\(urls.syslog)) format. - """ - notices: [ - """ - The function makes a best effort to parse the various Syslog formats that exists out in the wild. This includes - [RFC 6587](\(urls.syslog_6587)), [RFC 5424](\(urls.syslog_5424)), [RFC 3164](\(urls.syslog_3164)), and other - common variations (such as the Nginx Syslog style). - """, - """ - All values are returned as strings. We recommend manually coercing values to desired types as you see fit. - """, - ] - - arguments: [ - { - name: "value" - description: "The text containing the Syslog message to parse." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted Syslog message.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse Syslog log (5424)" - source: """ - parse_syslog!( - s'<13>1 2020-03-13T20:45:38.119Z dynamicwireless.name non 2426 ID931 [exampleSDID@32473 iut="3" eventSource= "Application" eventID="1011"] Try to override the THX port, maybe it will reboot the neural interface!' - ) - """ - return: { - severity: "notice" - facility: "user" - timestamp: "2020-03-13T20:45:38.119Z" - hostname: "dynamicwireless.name" - appname: "non" - procid: 2426 - msgid: "ID931" - message: "Try to override the THX port, maybe it will reboot the neural interface!" - "exampleSDID@32473": { - eventID: "1011" - eventSource: "Application" - iut: "3" - } - version: 1 - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_timestamp.cue b/website/cue/reference/remap/functions/parse_timestamp.cue deleted file mode 100644 index a47aaa2aa6133..0000000000000 --- a/website/cue/reference/remap/functions/parse_timestamp.cue +++ /dev/null @@ -1,54 +0,0 @@ -package metadata - -remap: functions: parse_timestamp: { - category: "Parse" - description: """ - Parses the `value` in [strptime](\(urls.strptime_specifiers)) `format`. - """ - - arguments: [ - { - name: "value" - description: "The text of the timestamp." - required: true - type: ["string"] - }, - { - name: "format" - description: "The [strptime](\(urls.strptime_specifiers)) format." - required: true - type: ["string"] - }, - { - name: "timezone" - description: """ - The [TZ database](\(urls.tz_time_zones)) format. By default, this function parses the timestamp by global [`timezone` option](\(urls.vector_configuration)/global-options#timezone). - This argument overwrites the setting and is useful for parsing timestamps without a specified timezone, such as `16/10/2019 12:00:00`. - """ - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` fails to parse using the provided `format`.", - "`value` fails to parse using the provided `timezone`.", - ] - return: types: ["timestamp"] - - examples: [ - { - title: "Parse timestamp" - source: #""" - parse_timestamp!("10-Oct-2020 16:00+00:00", format: "%v %R %:z") - """# - return: "2020-10-10T16:00:00Z" - }, - { - title: "Parse timestamp with timezone" - source: #""" - parse_timestamp!("16/10/2019 12:00:00", format: "%d/%m/%Y %H:%M:%S", timezone: "Asia/Taipei") - """# - return: "2019-10-16T04:00:00Z" - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_tokens.cue b/website/cue/reference/remap/functions/parse_tokens.cue deleted file mode 100644 index c76fb6e2f6ef2..0000000000000 --- a/website/cue/reference/remap/functions/parse_tokens.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: parse_tokens: { - category: "Parse" - description: #""" - Parses the `value` in token format. A token is considered to be one of the following: - - * A word surrounded by whitespace. - * Text delimited by double quotes: `".."`. Quotes can be included in the token if they are escaped by a backslash (`\`). - * Text delimited by square brackets: `[..]`. Closing square brackets can be included in the token if they are escaped by a backslash (`\`). - """# - notices: [ - """ - All token values are returned as strings. We recommend manually coercing values to desired types as you see fit. - """, - ] - - arguments: [ - { - name: "value" - description: "The string to tokenize." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted tokenized string.", - ] - return: types: ["array"] - - examples: [ - { - title: "Parse tokens" - source: #""" - parse_tokens( - "A sentence \"with \\\"a\\\" sentence inside\" and [some brackets]" - ) - """# - return: ["A", "sentence", #"with \"a\" sentence inside"#, "and", "some brackets"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_url.cue b/website/cue/reference/remap/functions/parse_url.cue deleted file mode 100644 index 0d69d7842dde9..0000000000000 --- a/website/cue/reference/remap/functions/parse_url.cue +++ /dev/null @@ -1,100 +0,0 @@ -package metadata - -remap: functions: parse_url: { - category: "Parse" - description: """ - Parses the `value` in [URL](\(urls.url)) format. - """ - - arguments: [ - { - name: "value" - description: "The text of the URL." - required: true - type: ["string"] - }, - { - name: "default_known_ports" - description: """ - If true and the port number is not specified in the input URL - string (or matches the default port for the scheme), it is - populated from well-known ports for the following schemes: - `http`, `https`, `ws`, `wss`, and `ftp`. - """ - required: false - type: ["boolean"] - default: false - }, - ] - internal_failure_reasons: [ - "`value` is not a properly formatted URL.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse URL" - source: #""" - parse_url!("ftp://foo:bar@example.com:4343/foobar?hello=world#123") - """# - return: { - scheme: "ftp" - username: "foo" - password: "bar" - host: "example.com" - port: 4343 - path: "/foobar" - query: hello: "world" - fragment: "123" - } - }, - { - title: "Parse URL with default port" - source: #""" - parse_url!("https://example.com", default_known_ports: true) - """# - return: { - scheme: "https" - username: "" - password: "" - host: "example.com" - port: 443 - path: "/" - query: {} - fragment: null - } - }, - { - title: "Parse URL with internationalized domain name" - source: #""" - parse_url!("https://www.café.com") - """# - return: { - scheme: "https" - username: "" - password: "" - host: "www.xn--caf-dma.com" - port: null - path: "/" - query: {} - fragment: null - } - }, - { - title: "Parse URL with mixed case internationalized domain name" - source: #""" - parse_url!("https://www.CAFé.com") - """# - return: { - scheme: "https" - username: "" - password: "" - host: "www.xn--caf-dma.com" - port: null - path: "/" - query: {} - fragment: null - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_user_agent.cue b/website/cue/reference/remap/functions/parse_user_agent.cue deleted file mode 100644 index 3f0717ff5a292..0000000000000 --- a/website/cue/reference/remap/functions/parse_user_agent.cue +++ /dev/null @@ -1,123 +0,0 @@ -package metadata - -remap: functions: parse_user_agent: { - category: "Parse" - description: """ - Parses the `value` as a user agent string, which has [a loosely defined format](\(urls.user_agent)) - so this parser only provides best effort guarantee. - """ - notices: [ - "All values are returned as strings or as null. We recommend manually coercing values to desired types as you see fit.", - "Different modes return different schema.", - "Field which were not parsed out are set as `null`.", - ] - - arguments: [ - { - name: "value" - description: "The string to parse." - required: true - type: ["string"] - }, - { - name: "mode" - description: "Determines performance and reliability characteristics." - required: false - enum: { - fast: "Fastest mode but most unreliable. Uses parser from project [Woothee](\(urls.woothee))." - reliable: """ - Provides greater reliability than `fast` and retains it's speed in common cases. - Parses with [Woothee](\(urls.woothee)) parser and with parser from [uap project](\(urls.uap)) if - there are some missing fields that the first parser wasn't able to parse out - but the second one maybe can. - """ - enriched: """ - Parses with both parser from [Woothee](\(urls.woothee)) and parser from [uap project](\(urls.uap)) - and combines results. Result has the full schema. - """ - } - default: "fast" - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["object"] - - examples: [ - { - title: "Fast mode" - source: #""" - parse_user_agent( - "Mozilla Firefox 1.0.1 Mozilla/5.0 (X11; U; Linux i686; de-DE; rv:1.7.6) Gecko/20050223 Firefox/1.0.1" - ) - """# - return: { - browser: { - family: "Firefox" - version: "1.0.1" - } - device: { - category: "pc" - } - os: { - family: "Linux" - version: null - } - } - }, - { - title: "Reliable mode" - source: #""" - parse_user_agent( - "Mozilla/4.0 (compatible; MSIE 7.66; Windows NT 5.1; SV1; .NET CLR 1.1.4322)", - mode: "reliable" - ) - """# - return: { - browser: { - family: "Internet Explorer" - version: "7.66" - } - device: { - category: "pc" - } - os: { - family: "Windows XP" - version: "NT 5.1" - } - } - }, - { - title: "Enriched mode" - source: #""" - parse_user_agent( - "Opera/9.80 (J2ME/MIDP; Opera Mini/4.3.24214; iPhone; CPU iPhone OS 4_2_1 like Mac OS X; AppleWebKit/24.783; U; en) Presto/2.5.25 Version/10.54", - mode: "enriched" - ) - """# - return: { - browser: { - family: "Opera Mini" - major: "4" - minor: "3" - patch: "24214" - version: "10.54" - } - device: { - brand: "Apple" - category: "smartphone" - family: "iPhone" - model: "iPhone" - } - os: { - family: "iOS" - major: "4" - minor: "2" - patch: "1" - patch_minor: null - version: "4.2.1" - } - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/parse_xml.cue b/website/cue/reference/remap/functions/parse_xml.cue deleted file mode 100644 index ffe2ed4348e54..0000000000000 --- a/website/cue/reference/remap/functions/parse_xml.cue +++ /dev/null @@ -1,97 +0,0 @@ -package metadata - -remap: functions: parse_xml: { - category: "Parse" - description: """ - Parses the `value` as XML. - """ - notices: [ - """ - Valid XML must contain exactly one root node. Always returns an object. - """, - ] - - arguments: [ - { - name: "value" - description: "The string representation of the XML document to parse." - required: true - type: ["string"] - }, - { - name: "include_attr" - description: "Include XML tag attributes in the returned object." - required: false - default: true - type: ["boolean"] - }, - { - name: "attr_prefix" - description: "String prefix to use for XML tag attribute keys." - required: false - default: "@" - type: ["string"] - }, - { - name: "text_key" - description: "Key name to use for expanded text nodes." - required: false - default: "text" - type: ["string"] - }, - { - name: "always_use_text_key" - description: "Always return text nodes as `{\"\": \"value\"}.`" - required: false - default: false - type: ["boolean"] - }, - { - name: "parse_bool" - description: "Parse \"true\" and \"false\" as boolean." - required: false - default: true - type: ["boolean"] - }, - { - name: "parse_null" - description: "Parse \"null\" as null." - required: false - default: true - type: ["boolean"] - }, - { - name: "parse_number" - description: "Parse numbers as integers/floats." - required: false - default: true - type: ["boolean"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid XML document.", - ] - return: types: ["object"] - - examples: [ - { - title: "Parse XML" - source: #""" - value = s'Harry PotterJ K. Rowling2005'; - - parse_xml!(value, text_key: "value", parse_number: false) - """# - return: { - "book": { - "@category": "CHILDREN" - "author": "J K. Rowling" - "title": { - "@lang": "en" - "value": "Harry Potter" - } - "year": "2005" - } - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/pascalcase.cue b/website/cue/reference/remap/functions/pascalcase.cue deleted file mode 100644 index cf0cc89dd7c4d..0000000000000 --- a/website/cue/reference/remap/functions/pascalcase.cue +++ /dev/null @@ -1,43 +0,0 @@ -package metadata - -remap: functions: pascalcase: { - category: "String" - description: """ - Takes the `value` string, and turns it into PascalCase. Optionally, you can - pass in the existing case of the function, or else we will try to figure out the case automatically. - """ - - arguments: [ - { - name: "value" - description: "The string to convert to PascalCase." - required: true - type: ["string"] - }, - { - name: "original_case" - description: "Optional hint on the original case type. Must be one of: kebab-case, camelCase, PascalCase, SCREAMING_SNAKE, snake_case" - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "PascalCase a string" - source: #""" - pascalcase("input-string") - """# - return: "InputString" - }, - { - title: "PascalCase a string" - source: #""" - pascalcase("input-string", "kebab-case") - """# - return: "InputString" - }, - ] -} diff --git a/website/cue/reference/remap/functions/pop.cue b/website/cue/reference/remap/functions/pop.cue deleted file mode 100644 index 4f55b23b92539..0000000000000 --- a/website/cue/reference/remap/functions/pop.cue +++ /dev/null @@ -1,34 +0,0 @@ -package metadata - -remap: functions: pop: { - category: "Array" - description: """ - Removes the last item from the `value` array. - """ - - arguments: [ - { - name: "value" - description: "The target array." - required: true - type: ["array"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array"] - rules: [ - "The original `value` is not modified.", - ] - } - - examples: [ - { - title: "Pop an item from an array" - source: """ - pop([1, 2, 3]) - """ - return: [1, 2] - }, - ] -} diff --git a/website/cue/reference/remap/functions/push.cue b/website/cue/reference/remap/functions/push.cue deleted file mode 100644 index 3bde2dda7ddf6..0000000000000 --- a/website/cue/reference/remap/functions/push.cue +++ /dev/null @@ -1,40 +0,0 @@ -package metadata - -remap: functions: push: { - category: "Array" - description: """ - Adds the `item` to the end of the `value` array. - """ - - arguments: [ - { - name: "value" - description: "The target array." - required: true - type: ["array"] - }, - { - name: "item" - description: "The item to push." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array"] - rules: [ - "Returns a new array. The `value` is _not_ modified in place.", - ] - } - - examples: [ - { - title: "Push an item onto an array" - source: """ - push([1, 2], 3) - """ - return: [1, 2, 3] - }, - ] -} diff --git a/website/cue/reference/remap/functions/random_bool.cue b/website/cue/reference/remap/functions/random_bool.cue deleted file mode 100644 index 9d1a784388eda..0000000000000 --- a/website/cue/reference/remap/functions/random_bool.cue +++ /dev/null @@ -1,22 +0,0 @@ -package metadata - -remap: functions: random_bool: { - category: "Random" - description: """ - Returns a random boolean. - """ - - arguments: [] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "Random boolean" - source: """ - is_boolean(random_bool()) - """ - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/random_bytes.cue b/website/cue/reference/remap/functions/random_bytes.cue deleted file mode 100644 index d8e4c4339f6a7..0000000000000 --- a/website/cue/reference/remap/functions/random_bytes.cue +++ /dev/null @@ -1,33 +0,0 @@ -package metadata - -remap: functions: random_bytes: { - category: "Random" - description: """ - A cryptographically secure random number generator. Returns a string value containing the number of - random bytes requested. - """ - - arguments: [ - { - name: "length" - description: "The number of bytes to generate. Must not be larger than 64k." - required: true - type: ["integer"] - }, - ] - internal_failure_reasons: [ - "`length` is negative.", - "`length` is larger than the maximum value (64k).", - ] - return: types: ["string"] - - examples: [ - { - title: "Generate random base 64 encoded bytes" - source: #""" - encode_base64(random_bytes(16)) - """# - return: "LNu0BBgUbh7XAlXbjSOomQ==" - }, - ] -} diff --git a/website/cue/reference/remap/functions/random_float.cue b/website/cue/reference/remap/functions/random_float.cue deleted file mode 100644 index 442d5c0809568..0000000000000 --- a/website/cue/reference/remap/functions/random_float.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: random_float: { - category: "Random" - description: """ - Returns a random float between [min, max). - """ - - arguments: [ - { - name: "min" - description: "Minimum value (inclusive)." - required: true - type: ["float"] - }, - { - name: "max" - description: "Maximum value (exclusive)." - required: true - type: ["float"] - }, - ] - internal_failure_reasons: [ - "`max` is not greater than `min`.", - ] - return: types: ["float"] - - examples: [ - { - title: "Random float from 0.0 to 10.0, not including 10.0" - source: """ - f = random_float(0.0, 10.0) - f >= 0 && f < 10 - """ - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/random_int.cue b/website/cue/reference/remap/functions/random_int.cue deleted file mode 100644 index 42af5ba2a467f..0000000000000 --- a/website/cue/reference/remap/functions/random_int.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: random_int: { - category: "Random" - description: """ - Returns a random integer between [min, max). - """ - - arguments: [ - { - name: "min" - description: "Minimum value (inclusive)." - required: true - type: ["integer"] - }, - { - name: "max" - description: "Maximum value (exclusive)." - required: true - type: ["integer"] - }, - ] - internal_failure_reasons: [ - "`max` is not greater than `min`.", - ] - return: types: ["integer"] - - examples: [ - { - title: "Random integer from 0 to 10, not including 10" - source: """ - i = random_int(0, 10) - i >= 0 && i < 10 - """ - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/redact.cue b/website/cue/reference/remap/functions/redact.cue deleted file mode 100644 index 0e278d62d1dd0..0000000000000 --- a/website/cue/reference/remap/functions/redact.cue +++ /dev/null @@ -1,143 +0,0 @@ -package metadata - -remap: functions: redact: { - category: "String" - description: """ - Redact sensitive data in `value` such as: - - - [US social security card numbers](\(urls.us_social_security_number)) - - Other forms of personally identifiable information with custom patterns - - This can help achieve compliance by ensuring sensitive data does not leave your network. - """ - - arguments: [ - { - name: "value" - description: #""" - The value to redact sensitive data from. - - The function's behavior depends on `value`'s type: - - - For strings, the sensitive data is redacted and a new string is returned. - - For arrays, the sensitive data is redacted in each string element. - - For objects, the sensitive data in each string value is masked, but the keys are not masked. - - For arrays and objects, the function recurses into any nested arrays or objects. Any non-string elements are - skipped. - - Redacted text is replaced with `[REDACTED]`. - """# - required: true - type: ["string", "object", "array"] - }, - { - name: "filters" - description: #""" - List of filters applied to `value`. - - Each filter can be specified in the following ways: - - - As a regular expression, which is used to redact text that match it. - - As an object with a `type` key that corresponds to a named filter and additional keys for customizing that filter. - - As a named filter, if it has no required parameters. - - Named filters can be a: - - - `pattern`: Redacts text matching any regular expressions specified in the `patterns` - key, which is required. This is the expanded version of just passing a regular expression as a filter. - - `us_social_security_number`: Redacts US social security card numbers. - - See examples for more details. - - This parameter must be a static expression so that the argument can be validated at compile-time - to avoid runtime errors. You cannot use variables or other dynamic expressions with it. - """# - required: true - type: ["array"] - }, - { - name: "redactor" - description: """ - Specifies what to replace the redacted strings with. - - It is given as an object with a "type" key specifying the type of redactor to use - and additional keys depending on the type. The following types are supported: - - - `full`: The default. Replace with the string "[REDACTED]". - - `text`: Replace with a custom string. The `replacement` key is required, and must - contain the string that is used as a replacement. - - `sha2`: Hash the redacted text with SHA-2 as with [`sha2`](\(urls.sha2)). Supports two optional parameters: - - `variant`: The variant of the algorithm to use. Defaults to SHA-512/256. - - `encoding`: How to encode the hash as text. Can be base16 or base64. - Defaults to base64. - - `sha3`: Hash the redacted text with SHA-3 as with [`sha3`](\(urls.sha3)). Supports two optional parameters: - - `variant`: The variant of the algorithm to use. Defaults to SHA3-512. - - `encoding`: How to encode the hash as text. Can be base16 or base64. - Defaults to base64. - - - As a convenience you can use a string as a shorthand for common redactor patterns: - - - `"full"` is equivalent to `{"type": "full"}` - - `"sha2"` is equivalent to `{"type": "sha2", "variant": "SHA-512/256", "encoding": "base64"}` - - `"sha3"` is equivalent to `{"type": "sha3", "variant": "SHA3-512", "encoding": "base64"}` - - This parameter must be a static expression so that the argument can be validated at compile-time - to avoid runtime errors. You cannot use variables or other dynamic expressions with it. - """ - required: false - type: ["string", "object"] - }, - ] - internal_failure_reasons: [] - return: types: ["string", "object", "array"] - - examples: [ - { - title: "Replace text using a regex" - source: #""" - redact("my id is 123456", filters: [r'\d+']) - """# - return: "my id is [REDACTED]" - }, - { - title: "Replace us social security numbers in any field" - source: #""" - redact({ "name": "John Doe", "ssn": "123-12-1234"}, filters: ["us_social_security_number"]) - """# - return: { - name: "John Doe" - ssn: "[REDACTED]" - } - }, - { - title: "Replace with custom text" - source: #""" - redact("my id is 123456", filters: [r'\d+'], redactor: {"type": "text", "replacement": "***"}) - """# - return: "my id is ***" - }, - { - title: "Replace with SHA-2 hash" - source: #""" - redact("my id is 123456", filters: [r'\d+'], redactor: "sha2") - """# - return: "my id is GEtTedW1p6tC094dDKH+3B8P+xSnZz69AmpjaXRd63I=" - }, - { - title: "Replace with SHA-3 hash" - source: #""" - redact("my id is 123456", filters: [r'\d+'], redactor: "sha3") - """# - return: "my id is ZNCdmTDI7PeeUTFnpYjLdUObdizo+bIupZdl8yqnTKGdLx6X3JIqPUlUWUoFBikX+yTR+OcvLtAqWO11NPlNJw==" - }, - { - title: "Replace with SHA-256 hash using hex encoding" - source: #""" - redact("my id is 123456", filters: [r'\d+'], redactor: {"type": "sha2", "variant": "SHA-256", "encoding": "base16"}) - """# - return: "my id is 8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92" - }, - ] -} diff --git a/website/cue/reference/remap/functions/remove.cue b/website/cue/reference/remap/functions/remove.cue deleted file mode 100644 index 4754035dfa5dc..0000000000000 --- a/website/cue/reference/remap/functions/remove.cue +++ /dev/null @@ -1,75 +0,0 @@ -package metadata - -remap: functions: remove: { - category: "Path" - description: """ - Dynamically remove the value for a given path. - - If you know the path you want to remove, use - the `del` function and static paths such as `del(.foo.bar[1])` - to remove the value at that path. The `del` function returns the - deleted value, and is more performant than `remove`. - However, if you do not know the path names, use the dynamic - `remove` function to remove the value at the provided path. - """ - - arguments: [ - { - name: "value" - description: "The object or array to remove data from." - required: true - type: ["object", "array"] - }, - { - name: "path" - description: "An array of path segments to remove the value from." - required: true - type: ["array"] - }, - { - name: "compact" - description: """ - After deletion, if `compact` is `true`, any empty objects or - arrays left are also removed. - """ - required: false - default: false - type: ["boolean"] - }, - ] - internal_failure_reasons: [ - #"The `path` segment must be a string or an integer."#, - ] - return: types: ["object", "array"] - - examples: [ - { - title: "single-segment top-level field" - source: #""" - remove!(value: { "foo": "bar" }, path: ["foo"]) - """# - return: {} - }, - { - title: "multi-segment nested field" - source: #""" - remove!(value: { "foo": { "bar": "baz" } }, path: ["foo", "bar"]) - """# - return: foo: {} - }, - { - title: "array indexing" - source: #""" - remove!(value: ["foo", "bar", "baz"], path: [-2]) - """# - return: ["foo", "baz"] - }, - { - title: "compaction" - source: #""" - remove!(value: { "foo": { "bar": [42], "baz": true } }, path: ["foo", "bar", 0], compact: true) - """# - return: foo: baz: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/remove_secret.cue b/website/cue/reference/remap/functions/remove_secret.cue deleted file mode 100644 index bb3f3a198430e..0000000000000 --- a/website/cue/reference/remap/functions/remove_secret.cue +++ /dev/null @@ -1,31 +0,0 @@ -package metadata - -remap: functions: remove_secret: { - category: "Event" - description: """ - Removes a secret from an event. - """ - - arguments: [ - { - name: "key" - description: """ - The name of the secret to remove. - """ - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["null"] - - examples: [ - { - title: "Removes the Datadog API key from the event" - source: #""" - remove_secret("datadog_api_key") - """# - return: null - }, - ] -} diff --git a/website/cue/reference/remap/functions/replace.cue b/website/cue/reference/remap/functions/replace.cue deleted file mode 100644 index d66218c6ea33e..0000000000000 --- a/website/cue/reference/remap/functions/replace.cue +++ /dev/null @@ -1,81 +0,0 @@ -package metadata - -remap: functions: replace: { - category: "String" - description: """ - Replaces all matching instances of `pattern` in `value`. - - The `pattern` argument accepts regular expression capture groups. - - **Note when using capture groups**: - - You will need to escape the `$` by using `$$` to avoid Vector interpreting it as an - [environment variable when loading configuration](/docs/reference/environment_variables/#escaping) - - If you want a literal `$` in the replacement pattern, you will also need to escape this - with `$$`. When combined with environment variable interpolation in config files this - means you will need to use `$$$$` to have a literal `$` in the replacement pattern. - """ - - arguments: [ - { - name: "value" - description: "The original string." - required: true - type: ["string"] - }, - { - name: "pattern" - description: "Replace all matches of this pattern. Can be a static string or a regular expression." - required: true - type: ["regex", "string"] - }, - { - name: "with" - description: "The string that the matches are replaced with." - required: true - type: ["string"] - }, - { - name: "count" - description: "The maximum number of replacements to perform. `-1` means replace all matches." - required: false - default: -1 - type: ["integer"] - - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Replace literal text" - source: #""" - replace("Apples and Bananas", "and", "not") - """# - return: "Apples not Bananas" - }, - { - title: "Replace using regular expression" - source: #""" - replace("Apples and Bananas", r'(?i)bananas', "Pineapples") - """# - return: "Apples and Pineapples" - }, - { - title: "Replace first instance" - source: #""" - replace("Bananas and Bananas", "Bananas", "Pineapples", count: 1) - """# - return: "Pineapples and Bananas" - }, - { - title: "Replace with capture groups when not set in the configuration file (use `$$num` in config files)" - source: #""" - # Note that in the context of Vector configuration files, an extra `$` escape character is required - # (i.e. `$$num`) to avoid interpreting `num` as an environment variable. - replace("foo123bar", r'foo(?P\d+)bar', "$num") - """# - return: "123" - }, - ] -} diff --git a/website/cue/reference/remap/functions/replace_with.cue b/website/cue/reference/remap/functions/replace_with.cue deleted file mode 100644 index 131f1e3e9d7f7..0000000000000 --- a/website/cue/reference/remap/functions/replace_with.cue +++ /dev/null @@ -1,85 +0,0 @@ -package metadata - -remap: functions: replace_with: { - category: "String" - description: """ - Replaces all matching instances of `pattern` using a closure. - - The `pattern` argument accepts a regular expression that can use capture groups. - - The function uses the function closure syntax to compute the replacement values. - - The closure takes a single parameter, which is an array, where the first item is always - present and contains the entire string that matched `pattern`. The items from index one on - contain the capture groups of the corresponding index. If a capture group is optional, the - value may be null if it didn't match. - - The value returned by the closure must be a string and will replace the section of - the input that was matched. - - This returns a new string with the replacements, the original string is not mutated. - """ - - arguments: [ - { - name: "value" - description: "The original string." - required: true - type: ["string"] - }, - { - name: "pattern" - description: "Replace all matches of this pattern. Must be a regular expression." - required: true - type: ["regex"] - }, - { - name: "count" - description: "The maximum number of replacements to perform. `-1` means replace all matches." - required: false - default: -1 - type: ["integer"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - examples: [ - { - title: "Capitalize words" - source: #""" - replace_with("apples and bananas", r'\b(\w)(\w*)') -> |match| { - upcase!(match.captures[0]) + string!(match.captures[1]) - } - """# - return: "Apples And Bananas" - }, - { - title: "Replace with hash" - source: #""" - replace_with("email from test@example.com", r'\w+@example.com') -> |match| { - sha2(match.string, variant: "SHA-512/224") - } - """# - return: "email from adf6e1bc4415d24912bd93072ad34ef825a7b6eb3bf53f68def1fc17" - }, - { - title: "Replace first instance" - source: #""" - replace_with("Apples and Apples", r'(?i)apples|cones', count: 1) -> |match| { - "Pine" + downcase(match.string) - } - """# - return: "Pineapples and Apples" - }, - { - title: "Named capture group" - source: #""" - replace_with("level=error A message", r'level=(?P\w+)') -> |match| { - lvl = upcase!(match.level) - "[{{lvl}}]" - } - """# - return: "[ERROR] A message" - }, - ] -} diff --git a/website/cue/reference/remap/functions/round.cue b/website/cue/reference/remap/functions/round.cue deleted file mode 100644 index d44327a441af4..0000000000000 --- a/website/cue/reference/remap/functions/round.cue +++ /dev/null @@ -1,48 +0,0 @@ -package metadata - -remap: functions: round: { - category: "Number" - description: """ - Rounds the `value` to the specified `precision`. - """ - - arguments: [ - { - name: "value" - description: "The number to round." - required: true - type: ["integer", "float"] - }, - { - name: "precision" - description: "The number of decimal places to round to." - required: false - default: 0 - type: ["integer"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["integer", "float"] - rules: [ - "If `precision` is `0`, then an integer is returned, otherwise a float is returned.", - ] - } - - examples: [ - { - title: "Round a number (without precision)" - source: #""" - round(4.345) - """# - return: 4.0 - }, - { - title: "Round a number (with precision)" - source: #""" - round(4.345, precision: 2) - """# - return: 4.35 - }, - ] -} diff --git a/website/cue/reference/remap/functions/screamingsnakecase.cue b/website/cue/reference/remap/functions/screamingsnakecase.cue deleted file mode 100644 index 37ccf0d97a6f1..0000000000000 --- a/website/cue/reference/remap/functions/screamingsnakecase.cue +++ /dev/null @@ -1,43 +0,0 @@ -package metadata - -remap: functions: screamingsnakecase: { - category: "String" - description: """ - Takes the `value` string, and turns it into SCREAMING_SNAKE case. Optionally, you can - pass in the existing case of the function, or else we will try to figure out the case automatically. - """ - - arguments: [ - { - name: "value" - description: "The string to convert to SCREAMING_SNAKE case." - required: true - type: ["string"] - }, - { - name: "original_case" - description: "Optional hint on the original case type. Must be one of: kebab-case, camelCase, PascalCase, SCREAMING_SNAKE, snake_case" - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "SCREAMING_SNAKE a string" - source: #""" - screamingsnakecase("input-string") - """# - return: "INPUT_STRING" - }, - { - title: "SCREAMING_SNAKE a string" - source: #""" - screamingsnakecase("input-string", "kebab-case") - """# - return: "INPUT_STRING" - }, - ] -} diff --git a/website/cue/reference/remap/functions/seahash.cue b/website/cue/reference/remap/functions/seahash.cue deleted file mode 100644 index d4367c2e7a827..0000000000000 --- a/website/cue/reference/remap/functions/seahash.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: seahash: { - category: "Cryptography" - description: """ - Calculates a [Seahash](\(urls.seahash)) hash of the `value`. - **Note**: Due to limitations in the underlying VRL data types, this function converts the unsigned 64-bit integer SeaHash result to a signed 64-bit integer. Results higher than the signed 64-bit integer maximum value wrap around to negative values. - """ - - arguments: [ - { - name: "value" - description: "The string to calculate the hash for." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["integer"] - - examples: [ - { - title: "Calculate seahash" - source: #""" - seahash("foobar") - """# - return: 5348458858952426560 - }, - { - title: "Calculate negative seahash" - source: #""" - seahash("bar") - """# - return: -2796170501982571315 - }, - ] -} diff --git a/website/cue/reference/remap/functions/set.cue b/website/cue/reference/remap/functions/set.cue deleted file mode 100644 index 6524588dc6388..0000000000000 --- a/website/cue/reference/remap/functions/set.cue +++ /dev/null @@ -1,63 +0,0 @@ -package metadata - -remap: functions: set: { - category: "Path" - description: """ - Dynamically insert data into the path of a given object or array. - - If you know the path you want to assign a value to, - use static path assignments such as `.foo.bar[1] = true` for - improved performance and readability. However, if you do not - know the path names, use the dynamic `set` function to - insert the data into the object or array. - """ - - arguments: [ - { - name: "value" - description: "The object or array to insert data into." - required: true - type: ["object", "array"] - }, - { - name: "path" - description: "An array of path segments to insert the value into." - required: true - type: ["array"] - }, - { - name: "data" - description: "The data to be inserted." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [ - #"The `path` segment must be a string or an integer."#, - ] - return: types: ["object", "array"] - - examples: [ - { - title: "single-segment top-level field" - source: #""" - set!(value: { "foo": "bar" }, path: ["foo"], data: "baz") - """# - return: foo: "baz" - }, - { - title: "multi-segment nested field" - source: #""" - set!(value: { "foo": { "bar": "baz" } }, path: ["foo", "bar"], data: "qux") - """# - return: foo: bar: "qux" - }, - { - title: "array" - source: #""" - set!(value: ["foo", "bar", "baz"], path: [-2], data: 42) - """# - return: ["foo", 42, "baz"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/set_secret.cue b/website/cue/reference/remap/functions/set_secret.cue deleted file mode 100644 index 7b54d22cb2f3a..0000000000000 --- a/website/cue/reference/remap/functions/set_secret.cue +++ /dev/null @@ -1,35 +0,0 @@ -package metadata - -remap: functions: set_secret: { - category: "Event" - description: """ - Sets the given secret in the event. - """ - - arguments: [ - { - name: "key" - description: "The name of the secret." - required: true - type: ["string"] - }, - { - name: "secret" - description: "The secret value." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["null"] - - examples: [ - { - title: "Set the Datadog API key to the given value" - source: #""" - set_secret("datadog_api_key", "abc122") - """# - return: null - }, - ] -} diff --git a/website/cue/reference/remap/functions/set_semantic_meaning.cue b/website/cue/reference/remap/functions/set_semantic_meaning.cue deleted file mode 100644 index d2431a3c7a638..0000000000000 --- a/website/cue/reference/remap/functions/set_semantic_meaning.cue +++ /dev/null @@ -1,43 +0,0 @@ -package metadata - -remap: functions: set_semantic_meaning: { - category: "Event" - description: """ - Sets a semantic meaning for an event. **Note**: This function assigns - meaning at startup, and has _no_ runtime behavior. It is suggested - to put all calls to this function at the beginning of a VRL function. The function - cannot be conditionally called. For example, using an if statement cannot stop the meaning - from being assigned. - """ - - arguments: [ - { - name: "target" - description: """ - The path of the value that is assigned a meaning. - """ - required: true - type: ["path"] - }, - { - name: "meaning" - description: """ - The name of the meaning to assign. - """ - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["null"] - - examples: [ - { - title: "Sets custom field semantic meaning" - source: #""" - set_semantic_meaning(.foo, "bar") - """# - return: null - }, - ] -} diff --git a/website/cue/reference/remap/functions/sha1.cue b/website/cue/reference/remap/functions/sha1.cue deleted file mode 100644 index fc63040192693..0000000000000 --- a/website/cue/reference/remap/functions/sha1.cue +++ /dev/null @@ -1,29 +0,0 @@ -package metadata - -remap: functions: sha1: { - category: "Cryptography" - description: """ - Calculates a [SHA-1](\(urls.sha1)) hash of the `value`. - """ - - arguments: [ - { - name: "value" - description: "The string to calculate the hash for." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Calculate sha1 hash" - source: #""" - sha1("foo") - """# - return: "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33" - }, - ] -} diff --git a/website/cue/reference/remap/functions/sha2.cue b/website/cue/reference/remap/functions/sha2.cue deleted file mode 100644 index 0bf171b0b6c57..0000000000000 --- a/website/cue/reference/remap/functions/sha2.cue +++ /dev/null @@ -1,44 +0,0 @@ -package metadata - -remap: functions: sha2: { - category: "Cryptography" - description: """ - Calculates a [SHA-2](\(urls.sha2)) hash of the `value`. - """ - - arguments: [ - { - name: "value" - description: "The string to calculate the hash for." - required: true - type: ["string"] - }, - { - name: "variant" - description: "The variant of the algorithm to use." - enum: { - "SHA-224": "SHA-224 algorithm" - "SHA-256": "SHA-256 algorithm" - "SHA-384": "SHA-384 algorithm" - "SHA-512": "SHA-512 algorithm" - "SHA-512/224": "SHA-512/224 algorithm" - "SHA-512/256": "SHA-512/256 algorithm" - } - required: false - default: "SHA-512/256" - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Calculate sha2 hash" - source: #""" - sha2("foo", variant: "SHA-512/224") - """# - return: "d68f258d37d670cfc1ec1001a0394784233f88f056994f9a7e5e99be" - }, - ] -} diff --git a/website/cue/reference/remap/functions/sha3.cue b/website/cue/reference/remap/functions/sha3.cue deleted file mode 100644 index f43ad144e5537..0000000000000 --- a/website/cue/reference/remap/functions/sha3.cue +++ /dev/null @@ -1,42 +0,0 @@ -package metadata - -remap: functions: sha3: { - category: "Cryptography" - description: """ - Calculates a [SHA-3](\(urls.sha3)) hash of the `value`. - """ - - arguments: [ - { - name: "value" - description: "The string to calculate the hash for." - required: true - type: ["string"] - }, - { - name: "variant" - description: "The variant of the algorithm to use." - enum: { - "SHA3-224": "SHA3-224 algorithm" - "SHA3-256": "SHA3-256 algorithm" - "SHA3-384": "SHA3-384 algorithm" - "SHA3-512": "SHA3-512 algorithm" - } - required: false - default: "SHA3-512" - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Calculate sha3 hash" - source: #""" - sha3("foo", variant: "SHA3-224") - """# - return: "f4f6779e153c391bbd29c95e72b0708e39d9166c7cea51d1f10ef58a" - }, - ] -} diff --git a/website/cue/reference/remap/functions/shannon_entropy.cue b/website/cue/reference/remap/functions/shannon_entropy.cue deleted file mode 100644 index 9f6c904403b3b..0000000000000 --- a/website/cue/reference/remap/functions/shannon_entropy.cue +++ /dev/null @@ -1,63 +0,0 @@ -package metadata - -remap: functions: shannon_entropy: { - category: "String" - description: """ - Generates [Shannon entropy](\(urls.shannon_entropy)) from given string. It can generate it - based on string bytes, codepoints, or graphemes. - """ - - arguments: [ - { - name: "value" - description: "The input string." - required: true - type: ["string"] - }, - { - name: "segmentation" - description: """ - Defines how to split the string to calculate entropy, based on occurrences of - segments. - - Byte segmentation is the fastest, but it might give undesired results when handling - UTF-8 strings, while grapheme segmentation is the slowest, but most correct in these - cases. - """ - required: false - type: ["string"] - default: "byte" - enum: { - byte: "Considers individual bytes when calculating entropy" - codepoint: "Considers codepoints when calculating entropy" - grapheme: "Considers graphemes when calculating entropy" - } - }, - ] - internal_failure_reasons: [] - return: types: ["float"] - - examples: [ - { - title: "Simple byte segmentation example" - source: #""" - floor(shannon_entropy("vector.dev"), precision: 4) - """# - return: 2.9219 - }, - { - title: "UTF-8 string with bytes segmentation" - source: #""" - floor(shannon_entropy("test123%456.فوائد.net."), precision: 4) - """# - return: 4.0784 - }, - { - title: "UTF-8 string with grapheme segmentation" - source: #""" - floor(shannon_entropy("test123%456.فوائد.net.", segmentation: "grapheme"), precision: 4) - """# - return: 3.9362 - }, - ] -} diff --git a/website/cue/reference/remap/functions/sieve.cue b/website/cue/reference/remap/functions/sieve.cue deleted file mode 100644 index 0388bb7f72c60..0000000000000 --- a/website/cue/reference/remap/functions/sieve.cue +++ /dev/null @@ -1,65 +0,0 @@ -package metadata - -remap: functions: sieve: { - category: "String" - description: """ - Keeps only matches of `pattern` in `value`. - - This can be used to define patterns that are allowed in the string and - remove everything else. - """ - - arguments: [ - { - name: "value" - description: "The original string." - required: true - type: ["string"] - }, - { - name: "pattern" - description: """ - Keep all matches of this pattern. - """ - required: true - type: ["regex"] - }, - { - name: "replace_single" - description: """ - The string to use to replace single rejected characters. - """ - required: false - default: "" - type: ["string"] - }, - { - name: "replace_repeated" - description: """ - The string to use to replace multiple sequential instances of rejected characters. - """ - required: false - default: "" - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Sieve with regex" - source: #""" - sieve("test123%456.فوائد.net.", r'[a-z0-9.]') - """# - return: "test123456..net." - }, - { - title: "Custom replacements" - source: #""" - sieve("test123%456.فوائد.net.", r'[a-z.0-9]', replace_single: "X", replace_repeated: "") - """# - return: "test123X456..net." - }, - ] -} diff --git a/website/cue/reference/remap/functions/slice.cue b/website/cue/reference/remap/functions/slice.cue deleted file mode 100644 index 8e4bf4afc2196..0000000000000 --- a/website/cue/reference/remap/functions/slice.cue +++ /dev/null @@ -1,53 +0,0 @@ -package metadata - -remap: functions: slice: { - category: "String" - description: """ - Returns a slice of `value` between the `start` and `end` positions. - - If the `start` and `end` parameters are negative, they refer to positions counting from the right of the - string or array. If `end` refers to a position that is greater than the length of the string or array, - a slice up to the end of the string or array is returned. - """ - - arguments: [ - { - name: "value" - description: "The string or array to slice." - required: true - type: ["array", "string"] - }, - { - name: "start" - description: "The inclusive start position. A zero-based index that can be negative." - required: true - type: ["integer"] - }, - { - name: "end" - description: "The exclusive end position. A zero-based index that can be negative." - required: false - default: "String length" - type: ["integer"] - }, - ] - internal_failure_reasons: [] - return: types: ["array", "string"] - - examples: [ - { - title: "Slice a string (positive index)" - source: #""" - slice!("Supercalifragilisticexpialidocious", start: 5, end: 13) - """# - return: "califrag" - }, - { - title: "Slice a string (negative index)" - source: #""" - slice!("Supercalifragilisticexpialidocious", start: 5, end: -14) - """# - return: "califragilistic" - }, - ] -} diff --git a/website/cue/reference/remap/functions/snakecase.cue b/website/cue/reference/remap/functions/snakecase.cue deleted file mode 100644 index dab198c67feef..0000000000000 --- a/website/cue/reference/remap/functions/snakecase.cue +++ /dev/null @@ -1,43 +0,0 @@ -package metadata - -remap: functions: snakecase: { - category: "String" - description: """ - Takes the `value` string, and turns it into snake-case. Optionally, you can - pass in the existing case of the function, or else we will try to figure out the case automatically. - """ - - arguments: [ - { - name: "value" - description: "The string to convert to snake-case." - required: true - type: ["string"] - }, - { - name: "original_case" - description: "Optional hint on the original case type. Must be one of: kebab-case, camelCase, PascalCase, SCREAMING_SNAKE, snake_case" - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "snake-case a string" - source: #""" - snakecase("input-string") - """# - return: "input_string" - }, - { - title: "snake-case a string" - source: #""" - snakecase("input-string", "kebab-case") - """# - return: "input_string" - }, - ] -} diff --git a/website/cue/reference/remap/functions/split.cue b/website/cue/reference/remap/functions/split.cue deleted file mode 100644 index 2775252423f1a..0000000000000 --- a/website/cue/reference/remap/functions/split.cue +++ /dev/null @@ -1,53 +0,0 @@ -package metadata - -remap: functions: split: { - category: "String" - description: """ - Splits the `value` string using `pattern`. - """ - - arguments: [ - { - name: "value" - description: "The string to split." - required: true - type: ["string"] - }, - { - name: "pattern" - description: "The string is split whenever this pattern is matched." - required: true - type: ["string", "regex"] - }, - { - name: "limit" - description: "The maximum number of substrings to return." - required: false - type: ["integer"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array"] - rules: [ - "If `limit` is specified, the remainder of the string is returned unsplit after `limit` has been reached.", - ] - } - - examples: [ - { - title: "Split a string (no limit)" - source: #""" - split("apples and pears and bananas", " and ") - """# - return: ["apples", "pears", "bananas"] - }, - { - title: "Split a string (with a limit)" - source: #""" - split("apples and pears and bananas", " and ", limit: 2) - """# - return: ["apples", "pears and bananas"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/split_path.cue b/website/cue/reference/remap/functions/split_path.cue deleted file mode 100644 index 981f64bfb6522..0000000000000 --- a/website/cue/reference/remap/functions/split_path.cue +++ /dev/null @@ -1,53 +0,0 @@ -package metadata - -remap: functions: split_path: { - category: "String" - description: """ - Splits the given `path` into its constituent components, returning an array of strings. - Each component represents a part of the file system path hierarchy. - """ - - arguments: [ - { - name: "value" - description: "The path to split into components." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid string.", - ] - return: types: ["array"] - - examples: [ - { - title: "Split path with trailing slash" - source: """ - split_path("/home/user/") - """ - return: ["/", "home", "user"] - }, - { - title: "Split path from file path" - source: """ - split_path("/home/user") - """ - return: ["/", "home", "user"] - }, - { - title: "Split path from root" - source: """ - split_path("/") - """ - return: ["/"] - }, - { - title: "Empty path returns empty array" - source: """ - split_path("") - """ - return: [] - }, - ] -} diff --git a/website/cue/reference/remap/functions/starts_with.cue b/website/cue/reference/remap/functions/starts_with.cue deleted file mode 100644 index 31496583701a3..0000000000000 --- a/website/cue/reference/remap/functions/starts_with.cue +++ /dev/null @@ -1,49 +0,0 @@ -package metadata - -remap: functions: starts_with: { - category: "String" - description: """ - Determines whether `value` begins with `substring`. - """ - - arguments: [ - { - name: "value" - description: "The string to search." - required: true - type: ["string"] - }, - { - name: "substring" - description: "The substring that the `value` must start with." - required: true - type: ["string"] - }, - { - name: "case_sensitive" - description: "Whether the match should be case sensitive." - required: false - type: ["boolean"] - default: true - }, - ] - internal_failure_reasons: [] - return: types: ["boolean"] - - examples: [ - { - title: "String starts with (case sensitive)" - source: #""" - starts_with("The Needle In The Haystack", "The Needle") - """# - return: true - }, - { - title: "String starts with (case insensitive)" - source: #""" - starts_with("The Needle In The Haystack", "the needle", case_sensitive: false) - """# - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/string.cue b/website/cue/reference/remap/functions/string.cue deleted file mode 100644 index 16ff4af9e39cb..0000000000000 --- a/website/cue/reference/remap/functions/string.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: string: { - category: "Type" - description: """ - Returns `value` if it is a string, otherwise returns an error. This enables the type checker to guarantee that the - returned value is a string and can be used in any function that expects a string. - """ - - arguments: [ - { - name: "value" - description: "The value to check if it is a string." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [ - "`value` is not a string.", - ] - return: { - types: ["string"] - rules: [ - #"Returns the `value` if it's a string."#, - #"Raises an error if not a string."#, - ] - } - examples: [ - { - title: "Declare a string type" - input: log: message: #"{"field": "value"}"# - source: #""" - string!(.message) - """# - return: input.log.message - }, - ] -} diff --git a/website/cue/reference/remap/functions/strip_ansi_escape_codes.cue b/website/cue/reference/remap/functions/strip_ansi_escape_codes.cue deleted file mode 100644 index 2ec9717b1a5fe..0000000000000 --- a/website/cue/reference/remap/functions/strip_ansi_escape_codes.cue +++ /dev/null @@ -1,29 +0,0 @@ -package metadata - -remap: functions: strip_ansi_escape_codes: { - category: "String" - description: """ - Strips [ANSI escape codes](\(urls.ansi_escape_codes)) from `value`. - """ - - arguments: [ - { - name: "value" - description: "The string to strip." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Strip ANSI escape codes" - source: #""" - strip_ansi_escape_codes("\e[46mfoo\e[0m bar") - """# - return: "foo bar" - }, - ] -} diff --git a/website/cue/reference/remap/functions/strip_whitespace.cue b/website/cue/reference/remap/functions/strip_whitespace.cue deleted file mode 100644 index bef2bec05b9e9..0000000000000 --- a/website/cue/reference/remap/functions/strip_whitespace.cue +++ /dev/null @@ -1,30 +0,0 @@ -package metadata - -remap: functions: strip_whitespace: { - category: "String" - description: """ - Strips whitespace from the start and end of `value`, where whitespace is defined by the [Unicode - `White_Space` property](\(urls.unicode_whitespace)). - """ - - arguments: [ - { - name: "value" - description: "The string to trim." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Strip whitespace" - source: #""" - strip_whitespace(" A sentence. ") - """# - return: "A sentence." - }, - ] -} diff --git a/website/cue/reference/remap/functions/strlen.cue b/website/cue/reference/remap/functions/strlen.cue deleted file mode 100644 index 5aab9bb20a188..0000000000000 --- a/website/cue/reference/remap/functions/strlen.cue +++ /dev/null @@ -1,35 +0,0 @@ -package metadata - -remap: functions: strlen: { - category: "Enumerate" - description: """ - Returns the number of UTF-8 characters in `value`. This differs from - `length` which counts the number of bytes of a string. - - **Note**: This is the count of [Unicode scalar values](https://www.unicode.org/glossary/#unicode_scalar_value) - which can sometimes differ from [Unicode code points](https://www.unicode.org/glossary/#code_point). - """ - - arguments: [ - { - name: "value" - description: "The string." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["integer"] - } - - examples: [ - { - title: "strlen" - source: """ - strlen("ñandú") - """ - return: 5 - }, - ] -} diff --git a/website/cue/reference/remap/functions/tag_types_externally.cue b/website/cue/reference/remap/functions/tag_types_externally.cue deleted file mode 100644 index 99e1e290bbb78..0000000000000 --- a/website/cue/reference/remap/functions/tag_types_externally.cue +++ /dev/null @@ -1,74 +0,0 @@ -package metadata - -remap: functions: tag_types_externally: { - category: "Type" - description: """ - Adds type information to all (nested) scalar values in the provided `value`. - - The type information is added externally, meaning that `value` has the form of `"type": value` after this - transformation. - """ - arguments: [ - { - name: "value" - description: "The value to tag with types." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [] - return: types: ["object", "array", "null"] - examples: [ - { - title: "Tag types externally (scalar)" - source: #""" - tag_types_externally(123) - """# - return: { - integer: 123 - } - }, - { - title: "Tag types externally (object)" - source: #""" - tag_types_externally({ - "message": "Hello world", - "request": { - "duration_ms": 67.9 - } - }) - """# - return: { - message: { - string: "Hello world" - } - request: { - duration_ms: { - float: 67.9 - } - } - } - }, - { - title: "Tag types externally (array)" - source: #""" - tag_types_externally(["foo", "bar"]) - """# - return: [ - { - string: "foo" - }, - { - string: "bar" - }, - ] - }, - { - title: "Tag types externally (null)" - source: #""" - tag_types_externally(null) - """# - return: null - }, - ] -} diff --git a/website/cue/reference/remap/functions/timestamp.cue b/website/cue/reference/remap/functions/timestamp.cue deleted file mode 100644 index 37da9bd4a2c78..0000000000000 --- a/website/cue/reference/remap/functions/timestamp.cue +++ /dev/null @@ -1,38 +0,0 @@ -package metadata - -remap: functions: timestamp: { - category: "Type" - description: """ - Returns `value` if it is a timestamp, otherwise returns an error. This enables the type checker to guarantee that - the returned value is a timestamp and can be used in any function that expects a timestamp. - """ - - arguments: [ - { - name: "value" - description: "The value to check if it is a timestamp." - required: true - type: ["any"] - }, - ] - internal_failure_reasons: [ - "`value` is not a timestamp.", - ] - return: { - types: ["timestamp"] - rules: [ - #"Returns the `value` if it's a timestamp."#, - #"Raises an error if not a timestamp."#, - ] - } - examples: [ - { - title: "Declare a timestamp type" - input: log: timestamp: "2020-10-10T16:00:00Z" - source: #""" - timestamp(t'2020-10-10T16:00:00Z') - """# - return: "2020-10-10T16:00:00Z" - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_bool.cue b/website/cue/reference/remap/functions/to_bool.cue deleted file mode 100644 index 3ba6a9d45d898..0000000000000 --- a/website/cue/reference/remap/functions/to_bool.cue +++ /dev/null @@ -1,69 +0,0 @@ -package metadata - -remap: functions: to_bool: { - category: "Coerce" - description: """ - Coerces the `value` into a boolean. - """ - - arguments: [ - { - name: "value" - description: "The value to convert to a Boolean." - required: true - type: ["boolean", "integer", "float", "null", "string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a supported boolean representation.", - ] - return: { - types: ["boolean"] - rules: [ - #"If `value` is `"true"`, `"t"`, `"yes"`, or `"y"`, `true` is returned."#, - #"If `value` is `"false"`, `"f"`, `"no"`, `"n"`, or `"0"`, `false` is returned."#, - #"If `value` is `0.0`, `false` is returned, otherwise `true` is returned."#, - #"If `value` is `0`, `false` is returned, otherwise `true` is returned."#, - #"If `value` is `null`, `false` is returned."#, - #"If `value` is a Boolean, it's returned unchanged."#, - ] - } - - examples: [ - { - title: "Coerce to a Boolean (string)" - source: """ - to_bool!("yes") - """ - return: true - }, - { - title: "Coerce to a Boolean (float)" - source: """ - to_bool(0.0) - """ - return: false - }, - { - title: "Coerce to a Boolean (int)" - source: """ - to_bool(0) - """ - return: false - }, - { - title: "Coerce to a Boolean (null)" - source: """ - to_bool(null) - """ - return: false - }, - { - title: "Coerce to a Boolean (Boolean)" - source: """ - to_bool(true) - """ - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_float.cue b/website/cue/reference/remap/functions/to_float.cue deleted file mode 100644 index fbbd18f16e0bc..0000000000000 --- a/website/cue/reference/remap/functions/to_float.cue +++ /dev/null @@ -1,48 +0,0 @@ -package metadata - -remap: functions: to_float: { - category: "Coerce" - description: """ - Coerces the `value` into a float. - """ - arguments: [ - { - name: "value" - description: """ - The value to convert to a float. Must be convertible to a float, otherwise an error is raised. - """ - required: true - type: ["integer", "float", "boolean", "string", "timestamp"] - }, - ] - internal_failure_reasons: [ - "`value` is not a supported float representation.", - ] - return: { - types: ["float"] - rules: [ - "If `value` is a float, it will be returned as-is.", - "If `value` is an integer, it will be returned as as a float.", - "If `value` is a string, it must be the string representation of an float or else an error is raised.", - "If `value` is a boolean, `0.0` is returned for `false` and `1.0` is returned for `true`.", - "If `value` is a timestamp, a [Unix timestamp](\(urls.unix_timestamp)) with fractional seconds is returned.", - ] - } - - examples: [ - { - title: "Coerce to a float" - source: """ - to_float!("3.145") - """ - return: 3.145 - }, - { - title: "Coerce to a float (timestamp)" - source: """ - to_float(t'2020-12-30T22:20:53.824727Z') - """ - return: 1609366853.824727 - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_int.cue b/website/cue/reference/remap/functions/to_int.cue deleted file mode 100644 index 688c59704c0a2..0000000000000 --- a/website/cue/reference/remap/functions/to_int.cue +++ /dev/null @@ -1,51 +0,0 @@ -package metadata - -remap: functions: to_int: { - category: "Coerce" - description: """ - Coerces the `value` into an integer. - """ - - arguments: [ - { - name: "value" - description: """ - The value to convert to an integer. - """ - required: true - type: ["integer", "float", "boolean", "string", "timestamp", "null"] - }, - ] - internal_failure_reasons: [ - "`value` is a string but the text is not an integer.", - "`value` is not a string, int, or timestamp.", - ] - return: { - types: ["integer"] - rules: [ - "If `value` is an integer, it will be returned as-is.", - "If `value` is a float, it will be truncated to its integer portion.", - "If `value` is a string, it must be the string representation of an integer or else an error is raised.", - "If `value` is a boolean, `0` is returned for `false` and `1` is returned for `true`.", - "If `value` is a timestamp, a [Unix timestamp](\(urls.unix_timestamp)) (in seconds) is returned.", - "If `value` is null, `0` is returned.", - ] - } - - examples: [ - { - title: "Coerce to an int (string)" - source: """ - to_int!("2") - """ - return: 2 - }, - { - title: "Coerce to an int (timestamp)" - source: """ - to_int(t'2020-12-30T22:20:53.824727Z') - """ - return: 1609366853 - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_regex.cue b/website/cue/reference/remap/functions/to_regex.cue deleted file mode 100644 index 9497da6d46508..0000000000000 --- a/website/cue/reference/remap/functions/to_regex.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: to_regex: { - category: "Coerce" - description: """ - Coerces the `value` into a regex. - """ - notices: ["Compiling a regular expression is an expensive operation and can limit Vector throughput. Don't use this function unless you are absolutely sure there is no other way!"] - - arguments: [ - { - name: "value" - description: "The value to convert to a regex." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a string.", - ] - return: { - types: ["regex"] - rules: [ - #"If `value` is a string that contains a valid regex, returns the regex constructed with this string."#, - ] - } - - examples: [ - { - title: "Coerce to a regex" - source: #""" - to_regex("^foo$") ?? r'' - """# - return: "^foo$" - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_string.cue b/website/cue/reference/remap/functions/to_string.cue deleted file mode 100644 index 4a8104d1bd32b..0000000000000 --- a/website/cue/reference/remap/functions/to_string.cue +++ /dev/null @@ -1,53 +0,0 @@ -package metadata - -remap: functions: to_string: { - category: "Coerce" - description: """ - Coerces the `value` into a string. - """ - - arguments: [ - { - name: "value" - description: "The value to convert to a string." - required: true - type: ["integer", "float", "boolean", "string", "timestamp", "null"] - }, - ] - internal_failure_reasons: [ - "`value` is not an integer, float, boolean, string, timestamp, or null.", - ] - return: { - types: ["string"] - rules: [ - #"If `value` is an integer or float, returns the string representation."#, - #"If `value` is a boolean, returns `"true"` or `"false"`."#, - #"If `value` is a timestamp, returns an [RFC 3339](\(urls.rfc3339)) representation."#, - #"If `value` is a null, returns `""`."#, - ] - } - - examples: [ - { - title: "Coerce to a string (Boolean)" - source: #""" - to_string(true) - """# - return: "true" - }, - { - title: "Coerce to a string (int)" - source: #""" - to_string(52) - """# - return: "52" - }, - { - title: "Coerce to a string (float)" - source: #""" - to_string(52.2) - """# - return: "52.2" - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_syslog_facility.cue b/website/cue/reference/remap/functions/to_syslog_facility.cue deleted file mode 100644 index 01cfc89ea909a..0000000000000 --- a/website/cue/reference/remap/functions/to_syslog_facility.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: to_syslog_facility: { - category: "Convert" - description: """ - Converts the `value`, a Syslog [facility code](\(urls.syslog_facility)), into its corresponding - Syslog keyword. For example, `0` into `"kern"`, `1` into `"user"`, etc. - """ - - arguments: [ - { - name: "value" - description: "The facility code." - required: true - type: ["integer"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid Syslog [facility code](\(urls.syslog_facility)).", - ] - return: types: ["string"] - - examples: [ - { - title: "Coerce to a Syslog facility" - source: """ - to_syslog_facility!(4) - """ - return: "auth" - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_syslog_facility_code.cue b/website/cue/reference/remap/functions/to_syslog_facility_code.cue deleted file mode 100644 index 718c82f82ea00..0000000000000 --- a/website/cue/reference/remap/functions/to_syslog_facility_code.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: to_syslog_facility_code: { - category: "Convert" - description: """ - Converts the `value`, a Syslog [facility keyword](\(urls.syslog_facility)), into a Syslog integer - facility code (`0` to `23`). - """ - - arguments: [ - { - name: "value" - description: "The Syslog facility keyword to convert." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid Syslog facility keyword.", - ] - return: types: ["integer"] - - examples: [ - { - title: "Coerce to Syslog facility code" - source: """ - to_syslog_facility_code!("authpriv") - """ - return: 10 - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_syslog_level.cue b/website/cue/reference/remap/functions/to_syslog_level.cue deleted file mode 100644 index c39f69e93d02c..0000000000000 --- a/website/cue/reference/remap/functions/to_syslog_level.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: to_syslog_level: { - category: "Convert" - description: """ - Converts the `value`, a Syslog [severity level](\(urls.syslog_levels)), into its corresponding keyword, - i.e. 0 into `"emerg"`, 1 into `"alert"`, etc. - """ - - arguments: [ - { - name: "value" - description: "The severity level." - required: true - type: ["integer"] - }, - ] - internal_failure_reasons: [ - "`value` isn't a valid Syslog [severity level](\(urls.syslog_levels)).", - ] - return: types: ["string"] - - examples: [ - { - title: "Coerce to a Syslog level" - source: """ - to_syslog_level!(5) - """ - return: "notice" - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_syslog_severity.cue b/website/cue/reference/remap/functions/to_syslog_severity.cue deleted file mode 100644 index a50986a9ecbc5..0000000000000 --- a/website/cue/reference/remap/functions/to_syslog_severity.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: to_syslog_severity: { - category: "Convert" - description: """ - Converts the `value`, a Syslog [log level keyword](\(urls.syslog_levels)), into a Syslog integer - severity level (`0` to `7`). - """ - - arguments: [ - { - name: "value" - description: "The Syslog level keyword to convert." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [ - "`value` is not a valid Syslog level keyword.", - ] - return: { - types: ["integer"] - rules: [ - "The now-deprecated keywords `panic`, `error`, and `warn` are converted to `0`, `3`, and `4` respectively.", - ] - } - - examples: [ - { - title: "Coerce to Syslog severity" - source: """ - to_syslog_severity!("alert") - """ - return: 1 - }, - ] -} diff --git a/website/cue/reference/remap/functions/to_unix_timestamp.cue b/website/cue/reference/remap/functions/to_unix_timestamp.cue deleted file mode 100644 index 2a622576f2678..0000000000000 --- a/website/cue/reference/remap/functions/to_unix_timestamp.cue +++ /dev/null @@ -1,59 +0,0 @@ -package metadata - -remap: functions: to_unix_timestamp: { - category: "Convert" - description: """ - Converts the `value` timestamp into a [Unix timestamp](\(urls.unix_timestamp)). - - Returns the number of seconds since the Unix epoch by default. To return the number in milliseconds or nanoseconds, set the `unit` argument to `milliseconds` or `nanoseconds`. - """ - - arguments: [ - { - name: "value" - description: "The timestamp to convert into a Unix timestamp." - required: true - type: ["timestamp"] - }, - { - name: "unit" - description: "The time unit." - type: ["string"] - required: false - enum: { - seconds: "Express Unix time in seconds" - milliseconds: "Express Unix time in milliseconds" - nanoseconds: "Express Unix time in nanoseconds" - } - default: "seconds" - }, - ] - internal_failure_reasons: [ - "`value` cannot be represented in nanoseconds. Result is too large or too small for a 64 bit integer.", - ] - return: types: ["integer"] - - examples: [ - { - title: "Convert to a Unix timestamp (seconds)" - source: #""" - to_unix_timestamp(t'2021-01-01T00:00:00+00:00') - """# - return: 1609459200 - }, - { - title: "Convert to a Unix timestamp (milliseconds)" - source: #""" - to_unix_timestamp(t'2021-01-01T00:00:00Z', unit: "milliseconds") - """# - return: 1609459200000 - }, - { - title: "Convert to a Unix timestamp (nanoseconds)" - source: #""" - to_unix_timestamp(t'2021-01-01T00:00:00Z', unit: "nanoseconds") - """# - return: 1609459200000000000 - }, - ] -} diff --git a/website/cue/reference/remap/functions/truncate.cue b/website/cue/reference/remap/functions/truncate.cue deleted file mode 100644 index 233fb95ab4bc4..0000000000000 --- a/website/cue/reference/remap/functions/truncate.cue +++ /dev/null @@ -1,66 +0,0 @@ -package metadata - -remap: functions: truncate: { - category: "String" - description: """ - Truncates the `value` string up to the `limit` number of characters. - """ - - arguments: [ - { - name: "value" - description: "The string to truncate." - required: true - type: ["string"] - }, - { - name: "limit" - description: "The number of characters to truncate the string after." - required: true - type: ["integer", "float"] - }, - { - name: "ellipsis" - description: """ - This argument is deprecated. An ellipsis (`...`) is appended if the parameter is set to `true` _and_ the `value` string - is truncated because it exceeded the `limit`. - """ - required: false - type: ["boolean"] - }, - { - name: "suffix" - description: """ - A custom suffix (`...`) is appended to truncated strings. - If `ellipsis` is set to `true`, this parameter is ignored for backwards compatibility. - """ - required: false - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["string"] - rules: [ - "The string is returned unchanged its length is less than `limit`.", - "If `ellipsis` is `true`, then an ellipsis (`...`) is appended to the string (beyond the specified `limit`).", - ] - } - - examples: [ - { - title: "Truncate a string" - source: #""" - truncate("A rather long sentence.", limit: 11, suffix: "...") - """# - return: "A rather lo..." - }, - { - title: "Truncate a string" - source: #""" - truncate("A rather long sentence.", limit: 11, suffix: "[TRUNCATED]") - """# - return: "A rather lo[TRUNCATED]" - }, - ] -} diff --git a/website/cue/reference/remap/functions/unflatten.cue b/website/cue/reference/remap/functions/unflatten.cue deleted file mode 100644 index 67ca69aa17010..0000000000000 --- a/website/cue/reference/remap/functions/unflatten.cue +++ /dev/null @@ -1,111 +0,0 @@ -package metadata - -remap: functions: unflatten: { - category: "Enumerate" - description: #""" - Unflattens the `value` into a nested representation. - """# - - arguments: [ - { - name: "value" - description: "The array or object to unflatten." - required: true - type: ["object"] - }, - { - name: "separator" - description: "The separator to split flattened keys." - required: false - default: "." - type: ["string"] - }, - { - name: "recursive" - description: "Whether to recursively unflatten the object values." - required: false - default: "true" - type: ["boolean"] - }, - ] - internal_failure_reasons: [] - return: types: ["object"] - - examples: [ - { - title: "Unflatten" - source: #""" - unflatten({ - "foo.bar.baz": true, - "foo.bar.qux": false, - "foo.quux": 42 - }) - """# - return: { - "foo": { - "bar": { - "baz": true - "qux": false - } - "quux": 42 - } - } - }, - { - title: "Unflatten recursively" - source: #""" - unflatten({ - "flattened.parent": { - "foo.bar": true, - "foo.baz": false - } - }) - """# - return: { - "flattened": { - "parent": { - "foo": { - "bar": true - "baz": false - } - } - } - } - }, - { - title: "Unflatten non-recursively" - source: #""" - unflatten({ - "flattened.parent": { - "foo.bar": true, - "foo.baz": false - } - }, recursive: false) - """# - return: { - "flattened": { - "parent": { - "foo.bar": true - "foo.baz": false - } - } - } - }, - { - title: "Ignore inconsistent keys values" - source: #""" - unflatten({ - "a": 3, - "a.b": 2, - "a.c": 4 - }) - """# - return: { - "a": { - "b": 2 - "c": 4 - } - } - }, - ] -} diff --git a/website/cue/reference/remap/functions/unique.cue b/website/cue/reference/remap/functions/unique.cue deleted file mode 100644 index 6dcb86389ca49..0000000000000 --- a/website/cue/reference/remap/functions/unique.cue +++ /dev/null @@ -1,33 +0,0 @@ -package metadata - -remap: functions: unique: { - category: "Enumerate" - description: #""" - Returns the unique values for an array. - - The first occurrence of each element is kept. - """# - - arguments: [ - { - name: "value" - description: "The array to return unique elements from." - required: true - type: ["array"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array"] - } - - examples: [ - { - title: "Unique" - source: #""" - unique(["foo", "bar", "foo", "baz"]) - """# - return: ["foo", "bar", "baz"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/unnest.cue b/website/cue/reference/remap/functions/unnest.cue deleted file mode 100644 index 3a1592a646f26..0000000000000 --- a/website/cue/reference/remap/functions/unnest.cue +++ /dev/null @@ -1,79 +0,0 @@ -package metadata - -remap: functions: unnest: { - category: "Object" - description: """ - Unnest an array field from an object to create an array of objects using that field; keeping all other fields. - - Assigning the array result of this to `.` results in multiple events being emitted from `remap`. See the - [`remap` transform docs](\(urls.vector_remap_transform_multiple)) for more details. - - This is also referred to as `explode` in some languages. - """ - - arguments: [ - { - name: "path" - description: "The path of the field to unnest." - required: true - type: ["path"] - }, - ] - internal_failure_reasons: [ - "The field path referred to is not an array.", - ] - notices: [] - return: { - types: ["array"] - rules: [ - "Returns an array of objects that matches the original object, but each with the specified path replaced with a single element from the original path.", - ] - } - - examples: [ - { - title: "Unnest an array field" - input: log: { - hostname: "localhost" - messages: [ - "message 1", - "message 2", - ] - } - source: ". = unnest!(.messages)" - output: [ - {log: { - hostname: "localhost" - messages: "message 1" - }}, - {log: { - hostname: "localhost" - messages: "message 2" - }}, - ] - }, - { - title: "Unnest nested an array field" - input: log: { - hostname: "localhost" - event: { - messages: [ - "message 1", - "message 2", - ] - } - } - source: ". = unnest!(.event.messages)" - output: [ - {log: { - hostname: "localhost" - event: messages: "message 1" - }}, - {log: { - hostname: "localhost" - event: messages: "message 2" - }}, - ] - }, - ] -} diff --git a/website/cue/reference/remap/functions/upcase.cue b/website/cue/reference/remap/functions/upcase.cue deleted file mode 100644 index 69f2f339cfcef..0000000000000 --- a/website/cue/reference/remap/functions/upcase.cue +++ /dev/null @@ -1,30 +0,0 @@ -package metadata - -remap: functions: upcase: { - description: """ - Upcases `value`, where upcase is defined according to the Unicode Derived Core Property - Uppercase. - """ - - arguments: [ - { - name: "value" - description: "The string to convert to uppercase." - required: true - type: ["string"] - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - category: "String" - - examples: [ - { - title: "Upcase a string" - source: #""" - upcase("Hello, World!") - """# - return: "HELLO, WORLD!" - }, - ] -} diff --git a/website/cue/reference/remap/functions/uuid_from_friendly_id.cue b/website/cue/reference/remap/functions/uuid_from_friendly_id.cue deleted file mode 100644 index b017e918bebfd..0000000000000 --- a/website/cue/reference/remap/functions/uuid_from_friendly_id.cue +++ /dev/null @@ -1,32 +0,0 @@ -package metadata - -remap: functions: uuid_from_friendly_id: { - category: "Random" - description: """ - Convert a Friendly ID (base62 encoding a 128-bit word) to a UUID. - """ - - arguments: [ - { - name: "value" - description: "A string that is a Friendly ID" - required: true - type: ["timestamp"] - }, - ] - internal_failure_reasons: [ - "`value` is a string but the text uses characters outside of class [0-9A-Za-z].", - "`value` is a base62 encoding of an integer, but the integer is greater than or equal to 2^128.", - ] - return: types: ["string"] - - examples: [ - { - title: "Convert a Friendly ID to a UUID" - source: #""" - uuid_from_friendly_id!("3s87yEvnmkiPBMHsj8bwwc") - """# - return: "7f41deed-d5e2-8b5e-7a13-ab4ff93cfad2" - }, - ] -} diff --git a/website/cue/reference/remap/functions/uuid_v4.cue b/website/cue/reference/remap/functions/uuid_v4.cue deleted file mode 100644 index c6c4a7738fed0..0000000000000 --- a/website/cue/reference/remap/functions/uuid_v4.cue +++ /dev/null @@ -1,22 +0,0 @@ -package metadata - -remap: functions: uuid_v4: { - category: "Random" - description: """ - Generates a random [UUIDv4](\(urls.uuidv4)) string. - """ - - arguments: [] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Create a UUIDv4" - source: #""" - uuid_v4() - """# - return: "1d262f4f-199b-458d-879f-05fd0a5f0683" - }, - ] -} diff --git a/website/cue/reference/remap/functions/uuid_v7.cue b/website/cue/reference/remap/functions/uuid_v7.cue deleted file mode 100644 index 8b6be2040e871..0000000000000 --- a/website/cue/reference/remap/functions/uuid_v7.cue +++ /dev/null @@ -1,44 +0,0 @@ -package metadata - -remap: functions: uuid_v7: { - category: "Random" - description: """ - Generates a random [UUIDv7](\(urls.uuidv7)) string. - """ - - arguments: [ - { - name: "timestamp" - description: "The timestamp used to generate the UUIDv7." - required: false - type: ["timestamp"] - default: "`now()`" - }, - ] - internal_failure_reasons: [] - return: types: ["string"] - - examples: [ - { - title: "Create a UUIDv7 with implicit `now()`" - source: #""" - uuid_v7() - """# - return: "06338364-8305-7b74-8000-de4963503139" - }, - { - title: "Create a UUIDv7 with explicit `now()`" - source: #""" - uuid_v7(now()) - """# - return: "018e29b3-0bea-7f78-8af3-d32ccb1b93c1" - }, - { - title: "Create a UUIDv7 with custom timestamp" - source: #""" - uuid_v7(t'2020-12-30T22:20:53.824727Z') - """# - return: "0176b5bd-5d19-7394-bb60-c21028c6152b" - }, - ] -} diff --git a/website/cue/reference/remap/functions/validate_json_schema.cue b/website/cue/reference/remap/functions/validate_json_schema.cue deleted file mode 100644 index 314d6fdee8bca..0000000000000 --- a/website/cue/reference/remap/functions/validate_json_schema.cue +++ /dev/null @@ -1,81 +0,0 @@ -package metadata - -remap: functions: validate_json_schema: { - category: "Type" - description: """ - Check if `value` conforms to a JSON Schema definition. This function validates a JSON payload against a JSON Schema definition. It can be used to ensure that the data structure and types in `value` match the expectations defined in `schema_definition`. - """ - notices: [ - """ - This function uses a compiled schema cache. The first time it is called with a specific `schema_definition`, it will compile the schema and cache it for subsequent calls. This improves performance when validating multiple values against the same schema. - The cache implementation is fairly naive and does not support refreshing the schema if it changes. If you update the schema definition file, you must restart Vector to clear the cache. - """, - ] - arguments: [ - { - name: "value" - description: #"The value to check if it conforms to the JSON schema definition."# - required: true - type: ["any"] - }, - { - name: "schema_definition" - description: #"The location (path) of the JSON Schema definition."# - required: true - type: ["any"] - }, - { - name: "ignore_unknown_formats" - description: #"Unknown formats can be silently ignored by setting this to `true` and validation continues without failing due to those fields."# - required: false - type: ["boolean"] - }, - - ] - internal_failure_reasons: [ - "`value` is not a valid JSON Schema payload.", - "`value` contains custom format declarations and `ignore_unknown_formats` has not been set to `true`.", - "`schema_definition` is not a valid JSON Schema definition.", - "`schema_definition` file does not exist.", - ] - return: { - types: ["boolean"] - rules: [ - #"Returns `true` if `value` conforms to the JSON Schema definition."#, - #"Returns `false` if `value` does not conform to the JSON Schema definition."#, - ] - } - - examples: [ - { - title: "Payload contains a valid email." - source: """ - validate_json_schema!(s'{ "productUser": "valid@email.com" }', "resources/json-schema_definition.json", false) - """ - return: true - }, - { - title: "Payload contains an invalid email." - source: """ - ok, _err = validate_json_schema(s'{ "productUser": "invalidEmail" }', "resources/json-schema_definition.json", false) - ok - """ - return: false - }, - { - title: "Payload contains a custom format declaration." - source: """ - ok, _err = validate_json_schema(s'{ "productUser": "a-custom-formatted-string" }', "resources/json-schema_definition.json", false) - ok - """ - return: false - }, - { - title: "Payload contains a custom format declaration, with ignore_unknown_formats set to true." - source: """ - validate_json_schema!(s'{ "productUser": "valid@email.com" }', "resources/json-schema_definition.json", true) - """ - return: true - }, - ] -} diff --git a/website/cue/reference/remap/functions/values.cue b/website/cue/reference/remap/functions/values.cue deleted file mode 100644 index 78586579b159d..0000000000000 --- a/website/cue/reference/remap/functions/values.cue +++ /dev/null @@ -1,37 +0,0 @@ -package metadata - -remap: functions: values: { - category: "Enumerate" - description: #""" - Returns the values from the object passed into the function. - """# - - arguments: [ - { - name: "value" - description: "The object to extract values from." - required: true - type: ["object"] - }, - ] - internal_failure_reasons: [] - return: { - types: ["array"] - rules: [ - #"Returns an array of all the values."#, - ] - } - examples: [ - { - title: "Get values from the object" - input: log: { - "key1": "val1" - "key2": "val2" - } - source: #""" - values({"key1": "val1", "key2": "val2"}) - """# - return: ["val1", "val2"] - }, - ] -} diff --git a/website/cue/reference/remap/functions/xxhash.cue b/website/cue/reference/remap/functions/xxhash.cue deleted file mode 100644 index f420bf9f80410..0000000000000 --- a/website/cue/reference/remap/functions/xxhash.cue +++ /dev/null @@ -1,65 +0,0 @@ -package metadata - -remap: functions: xxhash: { - category: "Checksum" - description: """ - Calculates a [xxHash](\(urls.xxhash_rust)) hash of the `value`. - **Note**: Due to limitations in the underlying VRL data types, this function converts the unsigned 64-bit integer hash result to a signed 64-bit integer. Results higher than the signed 64-bit integer maximum value wrap around to negative values. For the XXH3-128 hash algorithm, values are returned as a string. - """ - - arguments: [ - { - name: "value" - description: "The string to calculate the hash for." - required: true - type: ["string"] - }, - { - name: "variant" - description: "The xxHash hashing algorithm to use." - required: false - type: ["string"] - default: "XXH32" - }, - ] - internal_failure_reasons: [] - return: types: ["integer", "string"] - - examples: [ - { - title: "Calculate a hash using the default (XXH32) algorithm" - source: #""" - xxhash("foo") - """# - return: 3792637401 - }, - { - title: "Calculate a hash using the XXH32 algorithm" - source: #""" - xxhash("foo", "XXH32") - """# - return: 3792637401 - }, - { - title: "Calculate a hash using the XXH64 algorithm" - source: #""" - xxhash("foo", "XXH64") - """# - return: 3728699739546630719 - }, - { - title: "Calculate a hash using the XXH3-64 algorithm" - source: #""" - xxhash("foo", "XXH3-64") - """# - return: -6093828362558603894 - }, - { - title: "Calculate a hash using the XXH3-128 algorithm" - source: #""" - xxhash("foo", "XXH3-128") - """# - return: "161745101148472925293886522910304009610" - }, - ] -} diff --git a/website/cue/reference/remap/functions/zip.cue b/website/cue/reference/remap/functions/zip.cue deleted file mode 100644 index 90f5e7927596f..0000000000000 --- a/website/cue/reference/remap/functions/zip.cue +++ /dev/null @@ -1,55 +0,0 @@ -package metadata - -remap: functions: zip: { - category: "Array" - description: """ - Iterate over several arrays in parallel, producing a new array containing arrays of items from each source. - The resulting array will be as long as the shortest input array, with all the remaining elements dropped. - This function is modeled from the `zip` function [in Python](https://docs.python.org/3/library/functions.html#zip), - but similar methods can be found in [Ruby](https://docs.ruby-lang.org/en/master/Array.html#method-i-zip) - and [Rust](https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.zip). - - If a single parameter is given, it must contain an array of all the input arrays. - """ - - arguments: [ - { - name: "array_0" - description: "The first array of elements, or the array of input arrays if no other parameter is present." - required: true - type: ["array"] - }, - { - name: "array_1" - description: "The second array of elements. If not present, the first parameter contains all the arrays." - required: false - type: ["array"] - }, - ] - internal_failure_reasons: [ - "`array_0` and `array_1` must be arrays.", - ] - return: { - types: ["array"] - rules: [ - "`zip` is considered fallible if any of the parameters is not an array, or if only the first parameter is present and it is not an array of arrays.", - ] - } - - examples: [ - { - title: "Merge two arrays" - source: #""" - zip([1, 2, 3], [4, 5, 6, 7]) - """# - return: [[1, 4], [2, 5], [3, 6]] - }, - { - title: "Merge three arrays" - source: #""" - zip([[1, 2], [3, 4], [5, 6]]) - """# - return: [[1, 3, 5], [2, 4, 6]] - }, - ] -} diff --git a/website/cue/reference/versions.cue b/website/cue/reference/versions.cue index 6c60ea659b5b3..ae51453c8f728 100644 --- a/website/cue/reference/versions.cue +++ b/website/cue/reference/versions.cue @@ -2,6 +2,7 @@ package metadata // This has to be maintained manually because there's currently no way to sort versions programmatically versions: [string, ...string] & [ + "0.54.0", "0.53.0", "0.52.0", "0.51.1", diff --git a/website/layouts/partials/data.html b/website/layouts/partials/data.html index 1c3d1b728a4d2..9252b6af3f414 100644 --- a/website/layouts/partials/data.html +++ b/website/layouts/partials/data.html @@ -1788,7 +1788,7 @@

Input
- {{ template "code" .log }} + {{ template "code" . }}
{{ end }} @@ -1810,6 +1810,16 @@

{{ end }} + + {{ if isset .ctx "raises" }} +
+ Raises + +
+ {{ template "code" .ctx.raises }} +
+
+ {{ end }} {{ end }} diff --git a/website/layouts/partials/docs/sidebar.html b/website/layouts/partials/docs/sidebar.html index bf44a644da000..f9c81ed763cd8 100644 --- a/website/layouts/partials/docs/sidebar.html +++ b/website/layouts/partials/docs/sidebar.html @@ -47,7 +47,7 @@ {{ define "subsection-group" }} {{ $here := .here }} {{ $section := .section }} -{{ $open := .ctx.IsAncestor $section }} +{{ $open := or (.ctx.IsAncestor $section) (eq .ctx.RelPermalink $here) }}
{{ template "link" (dict "here" $here "url" .ctx.RelPermalink "title" (.ctx.Params.short | default .ctx.Title)) }} diff --git a/website/layouts/partials/heading.html b/website/layouts/partials/heading.html index 84f361f360851..853e0ffd9a5bb 100644 --- a/website/layouts/partials/heading.html +++ b/website/layouts/partials/heading.html @@ -1,5 +1,5 @@ {{ $id := (.id | default .text) | urlize }} -{{ $text := .text | markdownify }} +{{ $text := cond .mono .text (.text | markdownify) }} {{ $level := .level | default 2 }} {{ $iconSizes := dict "1" "7" "2" "6" "3" "5" "4" "4" "5" "3" "6" "3" }} {{ $n := index $iconSizes ($level | string) }} diff --git a/website/layouts/partials/meta.html b/website/layouts/partials/meta.html index 7d654646ed1d8..b85d96d6233bd 100644 --- a/website/layouts/partials/meta.html +++ b/website/layouts/partials/meta.html @@ -1,7 +1,7 @@ {{ $url := .Permalink }} {{ $favicon := site.Params.favicon | absURL }} {{ $desc := (cond .IsHome site.Params.description .Description) | markdownify | plainify }} -{{ $author := site.Author }} +{{ $author := site.Params.author }} {{ $img := site.Params.site_logo | absURL }} {{ $imgAlt := printf "Logo for %s" site.Title }} {{ $twitter := printf "@%s" site.Params.social.twitter_handle }} diff --git a/website/layouts/shortcodes/administration/logs.html b/website/layouts/shortcodes/administration/logs.html deleted file mode 100644 index b5bc0711ea0eb..0000000000000 --- a/website/layouts/shortcodes/administration/logs.html +++ /dev/null @@ -1,90 +0,0 @@ -{{ $ui := site.Data.docs.administration.ui.management }} -{{ $latest := index site.Data.docs.versions 0 }} -
- {{ partial "ui/selector.html" (dict "name" "Platform" "items" $ui.family_names "var" "platform") }} - - {{ range $ui.families }} - {{ $var := printf "%s_interface" .name }} -
- {{ partial "ui/selector.html" (dict "name" "Interface" "items" .interface_names "var" $var) }} -
- {{ end }} -
- -
- {{ range $ui.families }} - {{ $highlighter := .highlighter }} -
- {{ $var := printf "%s_interface" .name }} - {{ range .interfaces }} - {{ $cmds := .manage }} -
-
- - Installer: {{ .title }} - -
- -
- - Variables - - - {{ with .variables.variants }} - - - variant - - - - {{ range . }} - {{ partial "badge.html" (dict "word" . "color" "gray") }} - {{ end }} - - - {{ end }} - - - - config_format - - - {{ range .variables.config_formats }} - {{ partial "badge.html" (dict "word" . "color" "gray") }} - {{ end }} - - -
- - - {{ with .manage }} -
- {{ range slice "start" "stop" "reload" "restart" }} - {{ $cmd := index $cmds . }} - - {{ with $cmd.info }} -
- {{ . | $.Page.RenderString }} -
- {{ end }} - - {{ if $cmd.command }} - {{ $cmd = $cmd.command | replaceRE "{version}" $latest }} -
- - {{ . | title }} - - -
- {{ highlight $cmd $highlighter "" }} -
-
- {{ end }} - {{ end }} -
- {{ end }} -
- {{ end }} -
- {{ end }} -
\ No newline at end of file diff --git a/website/layouts/shortcodes/administration/manage.html b/website/layouts/shortcodes/administration/manage.html deleted file mode 100644 index f73b0804d297a..0000000000000 --- a/website/layouts/shortcodes/administration/manage.html +++ /dev/null @@ -1,85 +0,0 @@ -{{ $ui := site.Data.docs.administration.ui.management }} -{{ $latest := index site.Data.docs.versions 0 }} -
- {{ partial "ui/selector.html" (dict "name" "Platform" "items" $ui.family_names "var" "platform") }} - - {{ range $ui.families }} - {{ $var := printf "%s_interface" .name }} -
- {{ partial "ui/selector.html" (dict "name" "Interface" "items" .interface_names "var" $var) }} -
- {{ end }} -
- -
- {{ range $ui.families }} - {{ $highlighter := .highlighter }} -
- {{ $var := printf "%s_interface" .name }} - {{ range .interfaces }} - {{ $cmds := .manage }} -
-
- - Installer: {{ .title }} - -
- -
- - Variables - - - {{ with .variables.variants }} - - - variant - - - - {{ range . }} - {{ partial "badge.html" (dict "word" . "color" "gray") }} - {{ end }} - - - {{ end }} - - - - config_format - - - {{ range .variables.config_formats }} - {{ partial "badge.html" (dict "word" . "color" "gray") }} - {{ end }} - - -
- - - {{ with .manage }} -
- {{ range slice "start" "stop" "reload" "restart" }} - {{ $cmd := index $cmds . }} - - {{ if $cmd.command }} - {{ $cmd = $cmd.command | replaceRE "{version}" $latest }} - -
- - {{ . | title }} - - -
- {{ highlight $cmd $highlighter "" }} -
-
- {{ end }} - {{ end }} -
- {{ end }} -
- {{ end }} -
- {{ end }} -
\ No newline at end of file diff --git a/website/layouts/shortcodes/administration/upgrading.html b/website/layouts/shortcodes/administration/upgrading.html deleted file mode 100644 index d38b348941c7d..0000000000000 --- a/website/layouts/shortcodes/administration/upgrading.html +++ /dev/null @@ -1,55 +0,0 @@ -{{ $ui := site.Data.docs.administration.ui.management }} -{{ $latest := index site.Data.docs.versions 0 }} -
- {{ partial "ui/selector.html" (dict "name" "Platform" "items" $ui.family_names "var" "platform") }} - - {{ range $ui.families }} - {{ $var := printf "%s_interface" .name }} -
- {{ partial "ui/selector.html" (dict "name" "Interface" "items" .interface_names "var" $var) }} -
- {{ end }} -
- -
- {{ range $ui.families }} - {{ $highlighter := .highlighter }} -
- {{ $var := printf "%s_interface" .name }} - {{ range .interfaces }} - {{ $cmds := .manage }} -
-
- - Installer: {{ .title }} - -
- - {{ with .manage }} -
- {{ range slice "upgrade" }} - {{ $cmd := index $cmds . }} - - {{ if $cmd.command }} - {{ $cmd = $cmd.command | replaceRE "{version}" $latest }} - -
- - {{ . | title }} - - -
- {{ highlight $cmd $highlighter "" }} -
-
- {{ else }} - No upgrade command specified. - {{ end }} - {{ end }} -
- {{ end }} -
- {{ end }} -
- {{ end }} -
\ No newline at end of file diff --git a/website/layouts/shortcodes/api/config.html b/website/layouts/shortcodes/api/config.html deleted file mode 100644 index 36a232a82de21..0000000000000 --- a/website/layouts/shortcodes/api/config.html +++ /dev/null @@ -1,2 +0,0 @@ -{{ $config := site.Data.docs.api.configuration }} -{{ partial "data.html" (dict "component_config" $config) }} \ No newline at end of file diff --git a/website/layouts/shortcodes/cli/interface.html b/website/layouts/shortcodes/cli/interface.html deleted file mode 100644 index 74064f93b0c5c..0000000000000 --- a/website/layouts/shortcodes/cli/interface.html +++ /dev/null @@ -1,9 +0,0 @@ -{{ $words := slice "flags" "options" "subcommand" "args" }} -
- - vector - {{ range $words }} - {{{ . }}} - {{ end }} - -
\ No newline at end of file diff --git a/website/layouts/shortcodes/config-cross-links.html b/website/layouts/shortcodes/config-cross-links.html new file mode 100644 index 0000000000000..c7f111632fac8 --- /dev/null +++ b/website/layouts/shortcodes/config-cross-links.html @@ -0,0 +1,18 @@ +{{ $currentGroup := .Get "group" -}} + +{{- /* Define all configuration pages in one place */ -}} +{{ $configPages := slice + (dict "group" "global_options" "url" "/docs/reference/configuration/global-options/" "title" "Global Options" "desc" "Global settings like data directories and timezone") + (dict "group" "pipeline_components" "url" "/docs/reference/configuration/pipeline-components/" "title" "Pipeline Components" "desc" "Configure sources, transforms, sinks, and enrichment tables") + (dict "group" "api" "url" "/docs/reference/configuration/api/" "title" "API" "desc" "Configure Vector's observability API") + (dict "group" "schema" "url" "/docs/reference/configuration/schema/" "title" "Schema" "desc" "Configure Vector's internal schema system") + (dict "group" "secrets" "url" "/docs/reference/configuration/secrets/" "title" "Secrets" "desc" "Configure secrets management") +-}} +

For other top-level configuration options, see:

+
    +{{ range $configPages -}} +{{ if ne .group $currentGroup -}} +
  • {{ .title }} - {{ .desc }}
  • +{{ end -}} +{{ end -}} +
diff --git a/website/layouts/shortcodes/config/global.html b/website/layouts/shortcodes/config/global.html deleted file mode 100644 index 200c929a7c92f..0000000000000 --- a/website/layouts/shortcodes/config/global.html +++ /dev/null @@ -1,2 +0,0 @@ -{{ $global := site.Data.docs.configuration.configuration }} -{{ partial "data.html" (dict "component_config" $global) }} \ No newline at end of file diff --git a/website/layouts/shortcodes/config/group.html b/website/layouts/shortcodes/config/group.html new file mode 100644 index 0000000000000..ec6138e389252 --- /dev/null +++ b/website/layouts/shortcodes/config/group.html @@ -0,0 +1,14 @@ +{{- $config := site.Data.docs.configuration -}} +{{- $fields := $config.configuration -}} +{{- $groupName := .Get "group" -}} + +{{- /* Filter fields to only those in the specified group */ -}} +{{- $groupFields := dict -}} +{{- range $fieldKey, $fieldValue := $fields -}} + {{- if eq $fieldValue.group $groupName -}} + {{- $groupFields = merge $groupFields (dict $fieldKey $fieldValue) -}} + {{- end -}} +{{- end -}} + +{{- /* Render fields using the existing data.html partial */ -}} +{{ partial "data.html" (dict "component_config" $groupFields) }} diff --git a/website/layouts/shortcodes/config/unit-tests.html b/website/layouts/shortcodes/config/unit-tests.html deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/website/layouts/shortcodes/details.html b/website/layouts/shortcodes/details.html deleted file mode 100644 index 387373d687387..0000000000000 --- a/website/layouts/shortcodes/details.html +++ /dev/null @@ -1,14 +0,0 @@ -
- - -
- {{ .Inner | $.Page.RenderString }} -
-
\ No newline at end of file diff --git a/website/layouts/shortcodes/file-issue.html b/website/layouts/shortcodes/file-issue.html deleted file mode 100644 index cc6dc801213e8..0000000000000 --- a/website/layouts/shortcodes/file-issue.html +++ /dev/null @@ -1,10 +0,0 @@ -{{ $url := printf "https://github.com/%s/issues/new?assignees=&labels=type%3A+feature&template=feature.md&title=" site.Params.social.github_repo }} - - - - - - - {{ .Get "text" }} - - \ No newline at end of file diff --git a/website/yarn.lock b/website/yarn.lock index fb7d5ede88f74..3aa10cd2672e1 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2904,9 +2904,9 @@ mini-svg-data-uri@^1.2.3: integrity sha512-gSfqpMRC8IxghvMcxzzmMnWpXAChSA+vy4cia33RgerMS8Fex95akUyQZPbxJJmeBGiGmK7n/1OpUX8ksRjIdA== minimatch@^3.1.1: - version "3.1.2" - resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" - integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw== + version "3.1.5" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.5.tgz#580c88f8d5445f2bd6aa8f3cadefa0de79fbd69e" + integrity sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w== dependencies: brace-expansion "^1.1.7"