elastic · romulets · Apr 10, 2026 · Apr 7, 2026 · Apr 10, 2026 · Apr 10, 2026
@@ -12,13 +12,27 @@ yarn start create-perf-data <name> <entity-count> <logs-per-entity> [start-index
 
 ### Options
 
-- `--distribution <type>`: `standard` or `equal` (default: `standard`)
+- `--distribution <type>` (default: `standard`):
+  - `equal`: 25% user, 25% host, 25% generic, 25% service (via ratios; service gets any remainder from rounding).
+  - `standard`: 33% user, 33% host, 33% generic, 1% service (same remainder behavior for service).
+  - `absolute`: exact counts per type; you **must** pass all four count flags below, and they **must sum** to `<entity-count>`.
+- `--user-count <n>`: With `absolute` only — number of user entities.
+- `--host-count <n>`: With `absolute` only — number of host entities.
+- `--service-count <n>`: With `absolute` only — number of service entities.
+- `--generic-count <n>`: With `absolute` only — number of generic entities.
+
+With `equal` or `standard`, do not pass the `--*-count` flags (the command will error). With `absolute`, all four counts are required.
+
+In generated perf data, the top-level `entity` field is emitted only for generic entities.
 
 ### Examples
 
 ```bash
 yarn start create-perf-data large 100000 5
 yarn start create-perf-data large 100000 5 0 --distribution equal
+yarn start create-perf-data custom-mix 82000 5 0 \
+  --distribution absolute \
+  --user-count 60000 --host-count 2000 --service-count 17000 --generic-count 3000
 ```
 
 ## `upload-perf-data`
@@ -28,20 +42,26 @@ Upload one Entity Store performance data file.
 ### Usage
 
 ```bash
-yarn start upload-perf-data [file] [--index <index>] [--delete]
+yarn start upload-perf-data [file] [--index <index>] [--delete] [options]
 ```
 
 ### Options
 
 - `--index <index>`: Destination index override
 - `--delete`: Delete existing entities/data before upload
+- `--metrics`: Generate metrics logs under `./logs` for baseline comparison (same format/prefix style as interval mode)
+- `--samplingInterval <seconds>`: Metrics sampling interval when `--metrics` is enabled (default: `5`)
+- `--transformTimeout <minutes>`: Generic transform wait timeout in metrics mode for V1 flow (default: `30`)
 - `--noTransforms`: Run Entity Store V2 / ESQL flow (enable V2, install V2, no transforms, v2 indices)
 
+When `--metrics` is enabled, log files can be used with `create-baseline`/`compare-metrics` by passing the emitted prefix. In V2 mode (`--noTransforms`), transform stats are skipped.
+
 ### Example
 
 ```bash
 yarn start upload-perf-data large --delete
 yarn start upload-perf-data large --delete --noTransforms
+yarn start upload-perf-data large --delete --metrics --samplingInterval 5
 ```
 
 ## `upload-perf-data-interval`

@@ -141,13 +141,6 @@ const generateHostFields = ({
 }: GeneratorOptions): HostFields => {
   const id = `${idPrefix}-host-${entityIndex}`;
   return {
-    entity: {
-      id: id,
-      name: id,
-      type: 'host',
-      sub_type: 'aws_ec2_instance',
-      address: `example.${idPrefix}.com`,
-    },
     host: {
       id: id,
       name: id,
@@ -183,8 +176,6 @@ const changeServiceName = (doc: Record<string, any>, addition: string) => {
   const newName = `${doc.service.name}-${addition}`;
   doc.service.name = newName;
   doc.service.id = newName;
-  doc.entity.name = newName;
-  doc.entity.id = newName;
   return doc;
 };
 
@@ -206,13 +197,6 @@ const changeGenericEntityName = (doc: Record<string, any>, addition: string) =>
 const generateUserFields = ({ idPrefix, entityIndex }: GeneratorOptions): UserFields => {
   const id = `${idPrefix}-user-${entityIndex}`;
   return {
-    entity: {
-      id: id,
-      name: id,
-      type: 'user',
-      sub_type: 'aws_iam_user',
-      address: `example.${idPrefix}.com`,
-    },
     user: {
       id: id,
       name: id,
@@ -233,13 +217,6 @@ const generateUserFields = ({ idPrefix, entityIndex }: GeneratorOptions): UserFi
 const generateServiceFields = ({ idPrefix, entityIndex }: GeneratorOptions): ServiceFields => {
   const id = `${idPrefix}-service-${entityIndex}`;
   return {
-    entity: {
-      id: id,
-      name: id,
-      type: 'service',
-      sub_type: 'system',
-      address: `example.${idPrefix}.com`,
-    },
     service: {
       id: id,
       name: id,
@@ -353,15 +330,56 @@ export const ENTITY_DISTRIBUTIONS = {
   },
 } as const;
 
-export type DistributionType = keyof typeof ENTITY_DISTRIBUTIONS;
+export type PresetDistributionType = keyof typeof ENTITY_DISTRIBUTIONS;
+/** CLI / API: preset ratios (`equal`, `standard`) or `absolute` (explicit per-type counts). */
+export type DistributionType = PresetDistributionType | 'absolute';
 export type EntityType = 'user' | 'host' | 'service' | 'generic';
 
-export const DEFAULT_DISTRIBUTION: DistributionType = 'standard';
+export const DEFAULT_DISTRIBUTION: PresetDistributionType = 'standard';
+
+export const isValidDistributionType = (value: string): value is DistributionType =>
+  value === 'absolute' || Object.prototype.hasOwnProperty.call(ENTITY_DISTRIBUTIONS, value);
+
+export type ExplicitEntityCountsInput = {
+  user: number;
+  host: number;
+  service: number;
+  generic: number;
+};
+
+/**
+ * Validates explicit per-type counts for `--distribution absolute`.
+ * Each count must be a non-negative integer; the sum must equal totalEntityCount.
+ */
+export const validateExplicitEntityCounts = (
+  totalEntityCount: number,
+  counts: ExplicitEntityCountsInput,
+) => {
+  const keys: (keyof ExplicitEntityCountsInput)[] = ['user', 'host', 'service', 'generic'];
+  for (const key of keys) {
+    const v = counts[key];
+    if (!Number.isInteger(v) || v < 0) {
+      throw new Error(`Invalid ${key} count: expected a non-negative integer, got ${String(v)}`);
+    }
+  }
+  const { user, host, service, generic } = counts;
+  const sum = user + host + service + generic;
+  if (sum !== totalEntityCount) {
+    throw new Error(`Explicit entity counts sum to ${sum} but entity-count is ${totalEntityCount}`);
+  }
+  return {
+    user,
+    host,
+    generic,
+    service,
+    total: totalEntityCount,
+  };
+};
 
 /**
- * Get entity distribution by type
+ * Get entity distribution by type (preset only; not used for `absolute`)
  */
-export const getEntityDistribution = (type: DistributionType = DEFAULT_DISTRIBUTION) => {
+export const getEntityDistribution = (type: PresetDistributionType = DEFAULT_DISTRIBUTION) => {
   return ENTITY_DISTRIBUTIONS[type];
 };
 
@@ -819,26 +837,54 @@ export const createPerfDataFile = async ({
   startIndex,
   name,
   distribution = DEFAULT_DISTRIBUTION,
+  explicitEntityCounts,
 }: {
   name: string;
   entityCount: number;
   logsPerEntity: number;
   startIndex: number;
   distribution?: DistributionType;
+  explicitEntityCounts?: ExplicitEntityCountsInput;
 }): Promise<void> => {
   const filePath = getFilePath(name);
-  const dist = getEntityDistribution(distribution);
-  const entityCounts = calculateEntityCounts(entityCount, dist);
+
+  let entityCounts: ReturnType<typeof calculateEntityCounts>;
+
+  if (distribution === 'absolute') {
+    if (explicitEntityCounts === undefined) {
+      throw new Error('explicitEntityCounts is required when distribution is absolute');
+    }
+    entityCounts = validateExplicitEntityCounts(entityCount, explicitEntityCounts);
+  } else {
+    if (explicitEntityCounts !== undefined) {
+      throw new Error('explicitEntityCounts must not be set unless distribution is absolute');
+    }
+    const dist = getEntityDistribution(distribution);
+    entityCounts = calculateEntityCounts(entityCount, dist);
+  }
 
   log.info(
     `Creating performance data file ${name} with ${entityCount} entities and ${logsPerEntity} logs per entity. Starting at index ${startIndex}`,
   );
-  log.info(
-    `Distribution (${distribution}): ${entityCounts.user} users (${(dist.user * 100).toFixed(1)}%), ` +
-      `${entityCounts.host} hosts (${(dist.host * 100).toFixed(1)}%), ` +
-      `${entityCounts.service} services (${(dist.service * 100).toFixed(1)}%), ` +
-      `${entityCounts.generic} generic entities (${(dist.generic * 100).toFixed(1)}%)`,
-  );
+
+  if (distribution === 'absolute') {
+    // if there are no entities, we will show 100% for each type
+    const pct = (n: number) => (entityCount > 0 ? ((n / entityCount) * 100).toFixed(1) : '100');
+    log.info(
+      `Distribution (absolute): ${entityCounts.user} users (${pct(entityCounts.user)}%), ` +
+        `${entityCounts.host} hosts (${pct(entityCounts.host)}%), ` +
+        `${entityCounts.service} services (${pct(entityCounts.service)}%), ` +
+        `${entityCounts.generic} generic entities (${pct(entityCounts.generic)}%)`,
+    );
+  } else {
+    const dist = getEntityDistribution(distribution);
+    log.info(
+      `Distribution (${distribution}): ${entityCounts.user} users (${(dist.user * 100).toFixed(1)}%), ` +
+        `${entityCounts.host} hosts (${(dist.host * 100).toFixed(1)}%), ` +
+        `${entityCounts.service} services (${(dist.service * 100).toFixed(1)}%), ` +
+        `${entityCounts.generic} generic entities (${(dist.generic * 100).toFixed(1)}%)`,
+    );
+  }
 
   if (fs.existsSync(filePath)) {
     log.info(`Data file ${name}.json already exists. Deleting...`);
@@ -1041,6 +1087,11 @@ export const uploadPerfDataFile = async (
   indexOverride?: string,
   deleteEntities?: boolean,
   noTransforms?: boolean,
+  metricsOptions?: {
+    enabled: boolean;
+    samplingIntervalMs: number;
+    transformTimeoutMs: number;
+  },
 ) => {
   const index = indexOverride || `logs-perftest.${name}-default`;
   const entityIndex = noTransforms ? ENTITY_INDEX_V2 : ENTITY_INDEX_V1;
@@ -1083,15 +1134,58 @@ export const uploadPerfDataFile = async (
     `Data file ${name} has ${lineCount} lines, ${entityCount} entities and ${logsPerEntity} logs per entity`,
   );
   const startTime = Date.now();
+  const samplingInterval = metricsOptions?.samplingIntervalMs ?? 5000;
+  const transformTimeout = metricsOptions?.transformTimeoutMs ?? 1800000;
+  const metricsEnabled = metricsOptions?.enabled ?? false;
+
+  let stopHealthLogging = () => {};
+  let stopTransformsLogging = () => {};
+  let stopNodeStatsLogging = () => {};
+  let stopKibanaStatsLogging = () => {};
+
+  if (metricsEnabled) {
+    stopHealthLogging = logClusterHealthEvery(name, samplingInterval);
+    stopNodeStatsLogging = logNodeStatsEvery(name, samplingInterval);
+    stopKibanaStatsLogging = logKibanaStatsEvery(name, samplingInterval);
+    if (!noTransforms) {
+      stopTransformsLogging = logTransformStatsEvery(name, samplingInterval);
+    }
+  }
 
-  await uploadFile({ filePath, index, lineCount });
-  const ingestTook = Date.now() - startTime;
-  log.info(`Data file ${name} uploaded to index ${index} in ${ingestTook}ms`);
+  try {
+    await uploadFile({ filePath, index, lineCount });
+    const ingestTook = Date.now() - startTime;
+    log.info(`Data file ${name} uploaded to index ${index} in ${ingestTook}ms`);
 
-  await countEntitiesUntil(name, entityCount, entityIndex);
+    await countEntitiesUntil(name, entityCount, entityIndex);
 
-  const tookTotal = Date.now() - startTime;
-  log.info(`Total time: ${tookTotal}ms`);
+    if (metricsEnabled && !noTransforms) {
+      log.info(
+        `Waiting for generic transform to process ${lineCount} documents (timeout: ${transformTimeout / 1000 / 60} minutes)...`,
+      );
+      try {
+        await waitForTransformToComplete(
+          'entities-v1-latest-security_generic_default',
+          lineCount,
+          transformTimeout,
+        );
+      } catch (error) {
+        log.warn(
+          `Warning: ${error instanceof Error ? error.message : 'Failed to wait for transform completion'}. Continuing...`,
+        );
+      }
+    }
+
+    const tookTotal = Date.now() - startTime;
+    log.info(`Total time: ${tookTotal}ms`);
+  } finally {
+    if (metricsEnabled) {
+      stopHealthLogging();
+      stopTransformsLogging();
+      stopNodeStatsLogging();
+      stopKibanaStatsLogging();
+    }
+  }
 };
 
 /**