Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions .github/workflows/service-registration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,27 @@ jobs:
run: >
./gradlew runStartUpCheck --info --scan -Denvironment.startServices=true

- name: Start OpenTelemetry containers
run: |
cd otel
sh/start_containers.sh

- name: Run startup check for modulith
run: >
./gradlew runStartUpCheck --info --scan -Denvironment.startServices=true -Denvironment.modulith=true
run: |
export OTEL_SDK_DISABLED=false
export OTEL_RESOURCE_ATTRIBUTES_DEPLOYMENT_ENVIRONMENT=dev
export OTEL_RESOURCE_ATTRIBUTES_SERVICE_NAME=apiml
export OTEL_RESOURCE_ATTRIBUTES_ZOS_SMF_ID=SYS1
export OTEL_RESOURCE_ATTRIBUTES_ZOS_SYSPLEX_NAME=SYSPLEX1
export OTEL_RESOURCE_ATTRIBUTES_MAINFRAME_LPAR_NAME=LPAR01
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
./gradlew runStartUpCheckWithOpenTelemetry --info --scan -Denvironment.startServices=true -Denvironment.modulith=true

- name: Validate telemetry data and stop containers
if: always()
run: |
cd otel
sh/validate_and_stop.sh

- name: Store results
uses: actions/upload-artifact@v4
Expand All @@ -45,5 +63,6 @@ jobs:
name: BuildAndTest-${{ env.JOB_ID }}
path: |
*/build/reports/**
otel/**

- uses: ./.github/actions/teardown
12 changes: 12 additions & 0 deletions integration-tests/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,18 @@ task runStartUpCheck(type: Test) {
group 'integration tests'
description "Check that the API Mediation Layer is up and running"

systemProperties System.properties
useJUnitPlatform {
includeTags 'StartupCheck'
excludeTags 'OpenTelemetryTest'
}
outputs.upToDateWhen { false }
}

task runStartUpCheckWithOpenTelemetry(type: Test) {
group 'integration tests'
description "Check that the API Mediation Layer is up and running with graceful wait for OpenTelemetry"

systemProperties System.properties
useJUnitPlatform {
includeTags 'StartupCheck'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@

package org.zowe.apiml.startup;

import lombok.SneakyThrows;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.zowe.apiml.startup.impl.ApiMediationLayerStartupChecker;
import org.zowe.apiml.util.categories.OpenTelemetryTest;
import org.zowe.apiml.util.categories.StartupCheck;

import java.time.Duration;

import static org.junit.jupiter.api.Assertions.assertTrue;

@StartupCheck
Expand All @@ -29,4 +33,15 @@ void setUp() {
void checkApiMediationLayerStart() {
assertTrue(true);
}

@Test
@OpenTelemetryTest
@SneakyThrows
void giveOpenTelemetryTimeToSendMetrics() {
//The application has to run for a while to collect and send the telemetry data
//so they can be evaluated in the OpenTelemetry Golden Tester
Thread.sleep(Duration.ofSeconds(30).toMillis());
assertTrue(true);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* This program and the accompanying materials are made available under the terms of the
* Eclipse Public License v2.0 which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-v20.html
*
* SPDX-License-Identifier: EPL-2.0
*
* Copyright Contributors to the Zowe Project.
*/

package org.zowe.apiml.util.categories;

import org.junit.jupiter.api.Tag;

import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

import static java.lang.annotation.ElementType.METHOD;
import static java.lang.annotation.ElementType.TYPE;

@Tag("OpenTelemetryTest")
@Target({ TYPE, METHOD })
@Retention(RetentionPolicy.RUNTIME)
public @interface OpenTelemetryTest {
}

Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@
import java.util.Map;
import java.util.Optional;

import static org.zowe.apiml.util.config.ConfigReader.IS_MODULITH_ENABLED;

//TODO this class doesn't lend itself well to switching of configurations.
//attls is integrated in a kludgy way, and deserves a rewrite

@Slf4j
public class FullApiMediationLayer {

public static final boolean IS_MODULITH_ENABLED = Boolean.parseBoolean(System.getProperty("environment.modulith"));

private RunningService discoveryService;
private RunningService gatewayService;
private RunningService apiCatalogService;
Expand Down
73 changes: 73 additions & 0 deletions otel/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# OpenTelemetry containers for integration testing

The [docker-compose.yml](docker-compose.yml) defines 2 containers:
- OpenTelemetry Collector (oallector)
- OpenTelemetry Golden Validator (golden)

The collector is the standard OpenTelemetry Collector ([docs](https://opentelemetry.io/docs/collector/), [repo](https://github.com/open-telemetry/opentelemetry-collector-contrib)). The Golden Tester comes from the [same](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/cmd/golden) repository and validates data exported from the collector. Only metrics (in [alpha](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/component-stability.md#alpha) stability level) are supported as of January 2026.

## Integration test flow

The API mediation layer produce telemetry data, that are exported to the Collector. Then the Collector exports the data to the Golden Tester the same way the data are published to an observability stack in real deployment. The Golden Tester validates the telemetry data against a definition from yaml file. If the validation does not pass within a timeout the container exits with exit code 1.

```mermaid
flowchart LR
apiml["APIML (modulith)"]
collector["OpenTelemetry Collector"]
collector-config{{config.yml}}
golden["OpenTelemetry Golden Tester"]
golden-config{{expected.yml}}
apiml -- sends telemetry data --> collector
subgraph docker
collector -- forwards telemetry data --> golden
collector-config -.-> collector
golden -. validates against .-> golden-config
end

```

The Golden Tester validates all metrics received, which makes definition of expected data difficult as the definition needs to be exhaustive. For this reason the OpenTelemetry collector is configured to produce at most one metric for validation, check the collector configuration file [otel-collector/config.yml](otel-collector/config.yml), which is mounted to the collector docker image.

The Golden Tester configuration is split into 2 parts:
- Configuration of the tester like timeout, ports, fields to ignore, etc. is done via cli arguments. CLI arguments for the golden binary are placed in the [docker-compose.yml](docker-compose.yml) file. The list of supported options can be found in the [golden binary sources](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/cmd/golden/internal/config.go).
- The definition of expected observability data is in [otel-golden/expected.yml](otel-golden/expected.yml).

### Golden Tester configuration consideration
Ideally, we want to have generic docker-compose file and configuration injected via mounted configuration files or environment variables. Unfortunately, the golden binary accepts only CLI arguments (except the definition of expected data).

Every CLI argument that requires a value is processed as 2 distinct arguments by the golden binary. Given the fact, that the [official golden docker image](https://github.com/open-telemetry/opentelemetry-collector-contrib/pkgs/container/opentelemetry-collector-contrib%2Fgolden) is build from the [scratch base](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/cmd/golden/Dockerfile), there is no shell inside the golden image that preprocess the cli arguments so the arguments are passed to the binary exactly as defined in the [docker-compose.yml](docker-compose.yml) file.

For instance if your docker file contains:
```
command:
- "--ignore-resource-attribute-value process.pid"
```
The whole string is passed to the binary and thus never matches the argument in the binary resulting in the value being ignored. The argument and value must be passed as two arguments:
```
command: [
"--ignore-resource-attribute-value", "process.pid"
]
```

When environment variables are used to pass values to the docker files, only simple values that can be used in single argument value can be used. Unfortunately, this is not usable for the `--ignore-resource-attribute-value` as they must be repeated for every single value to be ignored.

Possible workarounds are:
- Use Docker multi-stage build to create a custom Golden Tester image with a shell. The shell parses the string arguments on white spaces and pass them as individual arguments to the binary. Then multiple arguments can be defined in an environment variable:
```
GOLDEN_IGNORE_FIELDS = "--ignore-resource-attribute-value service.instance.id --ignore-resource-attribute-value host.name --ignore-resource-attribute-value host.arch --ignore-resource-attribute-value process.pid"
```
and the variable used as a placeholder in the docker compose `command`.

- Add the arguments to the `docker compose run` command:
```shell
$ docker compose run --rm --service-ports golden ----ignore-resource-attribute-value service.instance.id --ignore-resource-attribute-value host.name --ignore-resource-attribute-value host.arch --ignore-resource-attribute-value process.pid
```
Note that `docker compose` cli arguments override the `command` value in the docker file, and the containers must be started individually in comparison to the simple `docker compose up`.

## Local run for development
To run the docker containers locally with the same setup as used in the integration tests, just run `docker compose up` (optionally with `-d`), and then start the APIML modulith with the OpenTelemetry enabled. The signals received and exported by the collector are saved to the [otel-golden](otel-golden) folder. The Golden Tester exits after timeout reporting the result of validation in the container console/log. The timeout can be set in the [docker-compose.yml](docker-compose.yml) file.





59 changes: 59 additions & 0 deletions otel/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
services:
# 1. OpenTelemetry Golden Tester
golden:
image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/golden:latest
container_name: golden
ports:
- "5318:4318" #For validation in pipeline
command: [
"--otlp-http-endpoint", "0.0.0.0:4318",
"--otlp-endpoint", "0.0.0.0:4317",
"--ignore-timestamp",
"--ignore-start-timestamp",
"--timeout", "3m",
"--ignore-resource-attribute-value", "service.instance.id",
"--ignore-resource-attribute-value", "host.name",
"--ignore-resource-attribute-value", "host.arch",
"--ignore-resource-attribute-value", "process.pid",
"--ignore-resource-attribute-value", "process.command_line",
"--ignore-resource-attribute-value", "process.command_args",
"--ignore-resource-attribute-value", "process.executable.path",
"--ignore-resource-attribute-value", "process.runtime.description",
"--ignore-resource-attribute-value", "process.runtime.version",
"--ignore-resource-attribute-value", "process.runtime.name",
"--ignore-resource-attribute-value", "os.description",
"--ignore-resource-attribute-value", "os.type",
"--ignore-resource-attribute-value", "telemetry.sdk.version",
"--ignore-resource-attribute-value", "telemetry.distro.name",
"--ignore-resource-attribute-value", "telemetry.distro.version",
"--ignore-resource-attribute-value", "telemetry.sdk.language",
"--ignore-resource-attribute-value", "telemetry.sdk.name",
"--ignore-resource-attribute-value", "service.version",
"--ignore-resource-attribute-value", "service.name",
"--ignore-metric-attribute-value", "service.instance.id",
"--ignore-metric-attribute-value", "service.version",
"--ignore-metric-attribute-value", "service.name",
"--ignore-resource-metrics-order",
"--ignore-scope-metrics-order",
"--ignore-metrics-order",
"--ignore-metrics-data-points-order",
"--ignore-metric-values",
"--ignore-data-points-attributes-order",
"--ignore-scope-version",
"--expected", "/var/data/expected.yaml",
# "--write-expected" # generates the expected definition file from received data
]
volumes:
- ./otel-golden:/var/data

# 2. OpenTelemetry Collector
collector:
image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:latest
container_name: collector
ports:
- "4317:4317" # OTLP gRPC vstup
- "4318:4318" # OTLP HTTP vstup
volumes:
- ./otel-collector:/etc/otel-collector
command:
- "--config=/etc/otel-collector/config.yaml"
96 changes: 96 additions & 0 deletions otel/otel-collector/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
receivers:
# OTLP receiver to receive telemetry data from services
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318

exporters:
# signals (received and exported) are saved to files
file/metrics:
path: /etc/otel-collector/metrics.json
append: false
format: json
file/metrics_filtered:
path: /etc/otel-collector/metrics_filtered.json
append: false
format: json
file/traces:
path: /etc/otel-collector/traces.json
append: false
format: json
file/logs:
path: /etc/otel-collector/logs.json
append: false
format: json
# debug printed to the console when added to exporters
debug:
verbosity: detailed
# otlp-http exporter to forward telemetry to the Golden Tester
otlphttp/golden:
endpoint: "http://golden:4318"
tls:
insecure: true
nop: {}

# The OpenTelemetry Golden Tester validates all received data,
# which requires all data to be described in golden expected definitions file.
# The metrics are filtered so only one metric is produced for validation.
processors:
# All metrics except jvm.cpu.count are ignored.
# The metric is expected to be always produced as we run java application.
filter/keep_specific_metrics:
error_mode: ignore
metrics:
include:
match_type: strict
metric_names:
- jvm.cpu.count

# Every metric is converted to test.metric:
# - The name reflects this is purely artificial metrics for testing
# - Is converted to gauge as the gauge carries fewer attributes which simplifies the validation
# - The value is always 1.0 so it is easy to define the expected value for validation
transform/all_in_one:
error_mode: ignore
metric_statements:
- context: metric
statements:
- set(name, "test.metric")
- set(description, "Synthetic metric for resource attribute validation carrying fixed dummy value")
- set(instrumentation_scope.name, "test")
- set(unit, "")
- convert_sum_to_gauge()
- context: datapoint
statements:
- set(value_double, 1.0)

service:
telemetry:
metrics:
level: none # Disables generation of internal otelcol_ metrics
pipelines:
traces:
# received traces are exported to console and file only
receivers: [otlp]
processors: []
exporters: [file/traces]
# received traces are exported to console and file only
metrics:
receivers: [otlp]
processors: []
exporters: [file/metrics]
# Received metrics are filtered and transformed. For any number of received metrics, at most one will be exported
# to console, file and Golden Tester.
metrics/filtered:
receivers: [ otlp ]
processors: [ filter/keep_specific_metrics, transform/all_in_one ]
exporters: [ file/metrics_filtered, otlphttp/golden ]
# received logs are exported to console and file only
logs:
receivers: [otlp]
processors: []
exporters: [file/logs]

15 changes: 15 additions & 0 deletions otel/otel-golden/expected.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
resourceMetrics:
- resource:
attributes:
- key: deployment.environment
value: { stringValue: "dev" }
- key: mainframe.lpar.name
value: { stringValue: "LPAR01" }
- key: zos.smf.id
value: { stringValue: "SYS1" }
- key: zos.sysplex.name
value: { stringValue: "SYSPLEX1" }
schemaUrl: "https://opentelemetry.io/schemas/1.24.0"
scopeMetrics:
- scope: { "name": "test" }
metrics: [ { "name": "test.metric", "description": "Synthetic metric for resource attribute validation carrying fixed dummy value", "gauge": {"dataPoints": [{"asDouble": 1}]} } ]
Loading
Loading