From bb9b855806b5b53d3f2baf984c0bf41c6e6cbe3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Farias?= Date: Tue, 7 Apr 2026 16:34:56 +0200 Subject: [PATCH 1/2] [Entity Store] Add more granular distributions --- src/commands/entity_store_perf/README.md | 24 ++- .../entity_store_perf/entity_store_perf.ts | 175 ++++++++++++++---- src/commands/entity_store_perf/index.ts | 104 +++++++++-- src/constants.ts | 2 +- src/types/entities.ts | 3 - src/utils/kibana_api.ts | 8 +- 6 files changed, 256 insertions(+), 60 deletions(-) diff --git a/src/commands/entity_store_perf/README.md b/src/commands/entity_store_perf/README.md index 49a875da..b94adb46 100644 --- a/src/commands/entity_store_perf/README.md +++ b/src/commands/entity_store_perf/README.md @@ -12,13 +12,27 @@ yarn start create-perf-data [start-index ### Options -- `--distribution `: `standard` or `equal` (default: `standard`) +- `--distribution ` (default: `standard`): + - `equal`: 25% user, 25% host, 25% generic, 25% service (via ratios; service gets any remainder from rounding). + - `standard`: 33% user, 33% host, 33% generic, 1% service (same remainder behavior for service). + - `absolute`: exact counts per type; you **must** pass all four count flags below, and they **must sum** to ``. +- `--user-count `: With `absolute` only — number of user entities. +- `--host-count `: With `absolute` only — number of host entities. +- `--service-count `: With `absolute` only — number of service entities. +- `--generic-count `: With `absolute` only — number of generic entities. + +With `equal` or `standard`, do not pass the `--*-count` flags (the command will error). With `absolute`, all four counts are required. + +In generated perf data, the top-level `entity` field is emitted only for generic entities. ### Examples ```bash yarn start create-perf-data large 100000 5 yarn start create-perf-data large 100000 5 0 --distribution equal +yarn start create-perf-data custom-mix 82000 5 0 \ + --distribution absolute \ + --user-count 60000 --host-count 2000 --service-count 17000 --generic-count 3000 ``` ## `upload-perf-data` @@ -28,20 +42,26 @@ Upload one Entity Store performance data file. ### Usage ```bash -yarn start upload-perf-data [file] [--index ] [--delete] +yarn start upload-perf-data [file] [--index ] [--delete] [options] ``` ### Options - `--index `: Destination index override - `--delete`: Delete existing entities/data before upload +- `--metrics`: Generate metrics logs under `./logs` for baseline comparison (same format/prefix style as interval mode) +- `--samplingInterval `: Metrics sampling interval when `--metrics` is enabled (default: `5`) +- `--transformTimeout `: Generic transform wait timeout in metrics mode for V1 flow (default: `30`) - `--noTransforms`: Run Entity Store V2 / ESQL flow (enable V2, install V2, no transforms, v2 indices) +When `--metrics` is enabled, log files can be used with `create-baseline`/`compare-metrics` by passing the emitted prefix. In V2 mode (`--noTransforms`), transform stats are skipped. + ### Example ```bash yarn start upload-perf-data large --delete yarn start upload-perf-data large --delete --noTransforms +yarn start upload-perf-data large --delete --metrics --samplingInterval 5 ``` ## `upload-perf-data-interval` diff --git a/src/commands/entity_store_perf/entity_store_perf.ts b/src/commands/entity_store_perf/entity_store_perf.ts index fc4cffe7..e9aca04f 100644 --- a/src/commands/entity_store_perf/entity_store_perf.ts +++ b/src/commands/entity_store_perf/entity_store_perf.ts @@ -143,13 +143,6 @@ const generateHostFields = ({ }: GeneratorOptions): HostFields => { const id = `${idPrefix}-host-${entityIndex}`; return { - entity: { - id: id, - name: id, - type: 'host', - sub_type: 'aws_ec2_instance', - address: `example.${idPrefix}.com`, - }, host: { id: id, name: id, @@ -185,8 +178,6 @@ const changeServiceName = (doc: Record, addition: string) => { const newName = `${doc.service.name}-${addition}`; doc.service.name = newName; doc.service.id = newName; - doc.entity.name = newName; - doc.entity.id = newName; return doc; }; @@ -208,13 +199,6 @@ const changeGenericEntityName = (doc: Record, addition: string) => const generateUserFields = ({ idPrefix, entityIndex }: GeneratorOptions): UserFields => { const id = `${idPrefix}-user-${entityIndex}`; return { - entity: { - id: id, - name: id, - type: 'user', - sub_type: 'aws_iam_user', - address: `example.${idPrefix}.com`, - }, user: { id: id, name: id, @@ -235,13 +219,6 @@ const generateUserFields = ({ idPrefix, entityIndex }: GeneratorOptions): UserFi const generateServiceFields = ({ idPrefix, entityIndex }: GeneratorOptions): ServiceFields => { const id = `${idPrefix}-service-${entityIndex}`; return { - entity: { - id: id, - name: id, - type: 'service', - sub_type: 'system', - address: `example.${idPrefix}.com`, - }, service: { id: id, name: id, @@ -355,15 +332,56 @@ export const ENTITY_DISTRIBUTIONS = { }, } as const; -export type DistributionType = keyof typeof ENTITY_DISTRIBUTIONS; +export type PresetDistributionType = keyof typeof ENTITY_DISTRIBUTIONS; +/** CLI / API: preset ratios (`equal`, `standard`) or `absolute` (explicit per-type counts). */ +export type DistributionType = PresetDistributionType | 'absolute'; export type EntityType = 'user' | 'host' | 'service' | 'generic'; -export const DEFAULT_DISTRIBUTION: DistributionType = 'standard'; +export const DEFAULT_DISTRIBUTION: PresetDistributionType = 'standard'; + +export const isValidDistributionType = (value: string): value is DistributionType => + value === 'absolute' || Object.prototype.hasOwnProperty.call(ENTITY_DISTRIBUTIONS, value); + +export type ExplicitEntityCountsInput = { + user: number; + host: number; + service: number; + generic: number; +}; + +/** + * Validates explicit per-type counts for `--distribution absolute`. + * Each count must be a non-negative integer; the sum must equal totalEntityCount. + */ +export const validateExplicitEntityCounts = ( + totalEntityCount: number, + counts: ExplicitEntityCountsInput, +) => { + const keys: (keyof ExplicitEntityCountsInput)[] = ['user', 'host', 'service', 'generic']; + for (const key of keys) { + const v = counts[key]; + if (!Number.isInteger(v) || v < 0) { + throw new Error(`Invalid ${key} count: expected a non-negative integer, got ${String(v)}`); + } + } + const { user, host, service, generic } = counts; + const sum = user + host + service + generic; + if (sum !== totalEntityCount) { + throw new Error(`Explicit entity counts sum to ${sum} but entity-count is ${totalEntityCount}`); + } + return { + user, + host, + generic, + service, + total: totalEntityCount, + }; +}; /** - * Get entity distribution by type + * Get entity distribution by type (preset only; not used for `absolute`) */ -export const getEntityDistribution = (type: DistributionType = DEFAULT_DISTRIBUTION) => { +export const getEntityDistribution = (type: PresetDistributionType = DEFAULT_DISTRIBUTION) => { return ENTITY_DISTRIBUTIONS[type]; }; @@ -819,26 +837,53 @@ export const createPerfDataFile = async ({ startIndex, name, distribution = DEFAULT_DISTRIBUTION, + explicitEntityCounts, }: { name: string; entityCount: number; logsPerEntity: number; startIndex: number; distribution?: DistributionType; + explicitEntityCounts?: ExplicitEntityCountsInput; }): Promise => { const filePath = getFilePath(name); - const dist = getEntityDistribution(distribution); - const entityCounts = calculateEntityCounts(entityCount, dist); + + let entityCounts: ReturnType; + + if (distribution === 'absolute') { + if (explicitEntityCounts === undefined) { + throw new Error('explicitEntityCounts is required when distribution is absolute'); + } + entityCounts = validateExplicitEntityCounts(entityCount, explicitEntityCounts); + } else { + if (explicitEntityCounts !== undefined) { + throw new Error('explicitEntityCounts must not be set unless distribution is absolute'); + } + const dist = getEntityDistribution(distribution); + entityCounts = calculateEntityCounts(entityCount, dist); + } log.info( `Creating performance data file ${name} with ${entityCount} entities and ${logsPerEntity} logs per entity. Starting at index ${startIndex}`, ); - log.info( - `Distribution (${distribution}): ${entityCounts.user} users (${(dist.user * 100).toFixed(1)}%), ` + - `${entityCounts.host} hosts (${(dist.host * 100).toFixed(1)}%), ` + - `${entityCounts.service} services (${(dist.service * 100).toFixed(1)}%), ` + - `${entityCounts.generic} generic entities (${(dist.generic * 100).toFixed(1)}%)`, - ); + + if (distribution === 'absolute') { + const pct = (n: number) => ((n / entityCount) * 100).toFixed(1); + log.info( + `Distribution (absolute): ${entityCounts.user} users (${pct(entityCounts.user)}%), ` + + `${entityCounts.host} hosts (${pct(entityCounts.host)}%), ` + + `${entityCounts.service} services (${pct(entityCounts.service)}%), ` + + `${entityCounts.generic} generic entities (${pct(entityCounts.generic)}%)`, + ); + } else { + const dist = getEntityDistribution(distribution); + log.info( + `Distribution (${distribution}): ${entityCounts.user} users (${(dist.user * 100).toFixed(1)}%), ` + + `${entityCounts.host} hosts (${(dist.host * 100).toFixed(1)}%), ` + + `${entityCounts.service} services (${(dist.service * 100).toFixed(1)}%), ` + + `${entityCounts.generic} generic entities (${(dist.generic * 100).toFixed(1)}%)`, + ); + } if (fs.existsSync(filePath)) { log.info(`Data file ${name}.json already exists. Deleting...`); @@ -1041,6 +1086,11 @@ export const uploadPerfDataFile = async ( indexOverride?: string, deleteEntities?: boolean, noTransforms?: boolean, + metricsOptions?: { + enabled: boolean; + samplingIntervalMs: number; + transformTimeoutMs: number; + }, ) => { const index = indexOverride || `logs-perftest.${name}-default`; const entityIndex = noTransforms ? ENTITY_INDEX_V2 : ENTITY_INDEX_V1; @@ -1083,15 +1133,58 @@ export const uploadPerfDataFile = async ( `Data file ${name} has ${lineCount} lines, ${entityCount} entities and ${logsPerEntity} logs per entity`, ); const startTime = Date.now(); + const samplingInterval = metricsOptions?.samplingIntervalMs ?? 5000; + const transformTimeout = metricsOptions?.transformTimeoutMs ?? 1800000; + const metricsEnabled = metricsOptions?.enabled ?? false; + + let stopHealthLogging = () => {}; + let stopTransformsLogging = () => {}; + let stopNodeStatsLogging = () => {}; + let stopKibanaStatsLogging = () => {}; + + if (metricsEnabled) { + stopHealthLogging = logClusterHealthEvery(name, samplingInterval); + stopNodeStatsLogging = logNodeStatsEvery(name, samplingInterval); + stopKibanaStatsLogging = logKibanaStatsEvery(name, samplingInterval); + if (!noTransforms) { + stopTransformsLogging = logTransformStatsEvery(name, samplingInterval); + } + } - await uploadFile({ filePath, index, lineCount }); - const ingestTook = Date.now() - startTime; - log.info(`Data file ${name} uploaded to index ${index} in ${ingestTook}ms`); + try { + await uploadFile({ filePath, index, lineCount }); + const ingestTook = Date.now() - startTime; + log.info(`Data file ${name} uploaded to index ${index} in ${ingestTook}ms`); - await countEntitiesUntil(name, entityCount, entityIndex); + await countEntitiesUntil(name, entityCount, entityIndex); - const tookTotal = Date.now() - startTime; - log.info(`Total time: ${tookTotal}ms`); + if (metricsEnabled && !noTransforms) { + log.info( + `Waiting for generic transform to process ${lineCount} documents (timeout: ${transformTimeout / 1000 / 60} minutes)...`, + ); + try { + await waitForTransformToComplete( + 'entities-v1-latest-security_generic_default', + lineCount, + transformTimeout, + ); + } catch (error) { + log.warn( + `Warning: ${error instanceof Error ? error.message : 'Failed to wait for transform completion'}. Continuing...`, + ); + } + } + + const tookTotal = Date.now() - startTime; + log.info(`Total time: ${tookTotal}ms`); + } finally { + if (metricsEnabled) { + stopHealthLogging(); + stopTransformsLogging(); + stopNodeStatsLogging(); + stopKibanaStatsLogging(); + } + } }; /** diff --git a/src/commands/entity_store_perf/index.ts b/src/commands/entity_store_perf/index.ts index e84fe149..41642e30 100644 --- a/src/commands/entity_store_perf/index.ts +++ b/src/commands/entity_store_perf/index.ts @@ -7,7 +7,7 @@ import { listPerfDataFiles, uploadPerfDataFile, uploadPerfDataFileInterval, - ENTITY_DISTRIBUTIONS, + isValidDistributionType, type DistributionType, } from './entity_store_perf.ts'; @@ -21,25 +21,89 @@ export const entityStorePerfCommands: CommandModule = { .argument('[start-index]', 'for sequential data, which index to start at', parseIntBase10, 0) .option( '--distribution ', - `Entity distribution type: equal (user/host/generic/service: 25% each), standard (user/host/generic/service: 33/33/33/1) (default: standard)`, + 'Entity distribution: equal (25% each type), standard (33/33/33/1%), absolute (requires --user-count, --host-count, --service-count, --generic-count; must sum to entity-count) (default: standard)', 'standard', ) + .option( + '--user-count ', + 'With --distribution absolute: number of user entities', + parseIntBase10, + ) + .option( + '--host-count ', + 'With --distribution absolute: number of host entities', + parseIntBase10, + ) + .option( + '--service-count ', + 'With --distribution absolute: number of service entities', + parseIntBase10, + ) + .option( + '--generic-count ', + 'With --distribution absolute: number of generic entities', + parseIntBase10, + ) .description('Create performance data') .action( wrapAction(async (name, entityCount, logsPerEntity, startIndex, options) => { const distributionType = options.distribution as DistributionType; - if (!ENTITY_DISTRIBUTIONS[distributionType]) { + if (!isValidDistributionType(distributionType)) { log.error(`❌ Invalid distribution type: ${distributionType}`); - log.error(` Available types: ${Object.keys(ENTITY_DISTRIBUTIONS).join(', ')}`); + log.error(` Available types: equal, standard, absolute`); process.exit(1); } - await createPerfDataFile({ - name, - entityCount, - logsPerEntity, - startIndex, - distribution: distributionType, - }); + + const userCount = options.userCount as number | undefined; + const hostCount = options.hostCount as number | undefined; + const serviceCount = options.serviceCount as number | undefined; + const genericCount = options.genericCount as number | undefined; + const anyCountOptionSet = + userCount !== undefined || + hostCount !== undefined || + serviceCount !== undefined || + genericCount !== undefined; + + if (distributionType === 'absolute') { + if ( + userCount === undefined || + hostCount === undefined || + serviceCount === undefined || + genericCount === undefined + ) { + log.error( + '❌ --distribution absolute requires --user-count, --host-count, --service-count, and --generic-count', + ); + process.exit(1); + } + await createPerfDataFile({ + name, + entityCount, + logsPerEntity, + startIndex, + distribution: 'absolute', + explicitEntityCounts: { + user: userCount, + host: hostCount, + service: serviceCount, + generic: genericCount, + }, + }); + } else { + if (anyCountOptionSet) { + log.error( + '❌ --user-count, --host-count, --service-count, and --generic-count are only valid with --distribution absolute', + ); + process.exit(1); + } + await createPerfDataFile({ + name, + entityCount, + logsPerEntity, + startIndex, + distribution: distributionType, + }); + } }), ); @@ -48,6 +112,19 @@ export const entityStorePerfCommands: CommandModule = { .argument('[file]', 'File to upload') .option('--index ', 'Destination index') .option('--delete', 'Delete all entities before uploading') + .option('--metrics', 'Generate metrics logs for baseline comparison') + .option( + '--samplingInterval ', + 'Sampling interval in seconds for metrics collection (default: 5)', + parseIntBase10, + 5, + ) + .option( + '--transformTimeout ', + 'Timeout in minutes for waiting for generic transform to complete in metrics mode (default: 30)', + parseIntBase10, + 30, + ) .option('--noTransforms', 'Use Entity Store V2 / ESQL flow (no transforms)') .description('Upload performance data file') .action( @@ -57,6 +134,11 @@ export const entityStorePerfCommands: CommandModule = { options.index, options.delete, options.noTransforms, + { + enabled: options.metrics, + samplingIntervalMs: options.samplingInterval * 1000, + transformTimeoutMs: options.transformTimeout * 60 * 1000, + }, ); }), ); diff --git a/src/constants.ts b/src/constants.ts index 9d32ecc6..0b395046 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -107,7 +107,7 @@ export const KIBANA_SETTINGS_URL = '/api/kibana/settings'; export const KIBANA_SETTINGS_INTERNAL_URL = '/internal/kibana/settings'; // Entity Store V2 (ESQL) internal API -export const ENTITY_STORE_V2_INSTALL_URL = '/internal/security/entity_store/install'; +export const ENTITY_STORE_V2_INSTALL_URL = '/api/security/entity_store/install'; // ML module group used by Security export const ML_GROUP_ID = 'security'; diff --git a/src/types/entities.ts b/src/types/entities.ts index e4b1dcfa..baeba33c 100644 --- a/src/types/entities.ts +++ b/src/types/entities.ts @@ -11,7 +11,6 @@ export interface EntityFields { } export interface HostFields { - entity: EntityFields; host: { hostname?: string; domain?: string; @@ -25,7 +24,6 @@ export interface HostFields { } export interface UserFields { - entity: EntityFields; user: { full_name?: string[]; domain?: string; @@ -45,7 +43,6 @@ export interface UserFields { } export interface ServiceFields { - entity: EntityFields; service: { name: string; id?: string; diff --git a/src/utils/kibana_api.ts b/src/utils/kibana_api.ts index 3404a222..109ce1ee 100644 --- a/src/utils/kibana_api.ts +++ b/src/utils/kibana_api.ts @@ -670,9 +670,13 @@ export const enableEntityStoreV2 = async (space: string = 'default'): Promise => { const spacePath = getEntityStoreV2SpacePath(space); - const installPath = `${spacePath}${ENTITY_STORE_V2_INSTALL_URL}?apiVersion=2`; + const installPath = `${spacePath}${ENTITY_STORE_V2_INSTALL_URL}`; - await kibanaFetch(installPath, { method: 'POST', body: JSON.stringify({}) }, { apiVersion: '2' }); + await kibanaFetch( + installPath, + { method: 'POST', body: JSON.stringify({}) }, + { apiVersion: API_VERSIONS.public.v1 }, + ); log.info('Entity Store V2 installed successfully'); }; From 2f4e0afea0f3f9a6fcedee846964020c43b41157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Farias?= Date: Fri, 10 Apr 2026 10:38:02 +0200 Subject: [PATCH 2/2] Fix div by 0 --- src/commands/entity_store_perf/entity_store_perf.ts | 3 ++- src/commands/entity_store_perf/index.ts | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/commands/entity_store_perf/entity_store_perf.ts b/src/commands/entity_store_perf/entity_store_perf.ts index a409bc7a..4a37d028 100644 --- a/src/commands/entity_store_perf/entity_store_perf.ts +++ b/src/commands/entity_store_perf/entity_store_perf.ts @@ -868,7 +868,8 @@ export const createPerfDataFile = async ({ ); if (distribution === 'absolute') { - const pct = (n: number) => ((n / entityCount) * 100).toFixed(1); + // if there are no entities, we will show 100% for each type + const pct = (n: number) => (entityCount > 0 ? ((n / entityCount) * 100).toFixed(1) : '100'); log.info( `Distribution (absolute): ${entityCounts.user} users (${pct(entityCounts.user)}%), ` + `${entityCounts.host} hosts (${pct(entityCounts.host)}%), ` + diff --git a/src/commands/entity_store_perf/index.ts b/src/commands/entity_store_perf/index.ts index 41642e30..c9c56a33 100644 --- a/src/commands/entity_store_perf/index.ts +++ b/src/commands/entity_store_perf/index.ts @@ -9,6 +9,7 @@ import { uploadPerfDataFileInterval, isValidDistributionType, type DistributionType, + ENTITY_DISTRIBUTIONS, } from './entity_store_perf.ts'; export const entityStorePerfCommands: CommandModule = { @@ -50,7 +51,7 @@ export const entityStorePerfCommands: CommandModule = { const distributionType = options.distribution as DistributionType; if (!isValidDistributionType(distributionType)) { log.error(`❌ Invalid distribution type: ${distributionType}`); - log.error(` Available types: equal, standard, absolute`); + log.error(` Available types: ${Object.keys(ENTITY_DISTRIBUTIONS).join(', ')}`); process.exit(1); }