Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,12 @@ config:
description: >-
The log level for the runner manager application. The value can be CRITICAL, FATAL, ERROR,
WARNING, INFO, or DEBUG.
otel-collector-endpoint:
type: string
default: ""
description: >-
The endpoint to send OpenTelemetry metrics to in the format "host:port". If not set, OpenTelemetry
will be disabled.

actions:
check-runners:
Expand Down
4 changes: 4 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

This changelog documents user-relevant changes to the GitHub runner charm.

## 2026-04-27

- Added configuration option `otel-collector-endpoint` to enable the otel-collector to export metric. Setting this configuration option will add the environment variable ACTION_OTEL_EXPORTER_OTLP_ENDPOINT to the runner, which allow users to configure their own metrics to be exported.

## 2026-04-22

- Removed `KillMode=process` from the runner manager systemd service, restoring the default `control-group` kill mode. This ensures all child processes in the service's cgroup are properly terminated when the service stops, preventing orphaned runner processes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class SupportServiceConfig(BaseModel):
aproxy_redirect_ports: A list of ports to redirect to the aproxy proxy.
dockerhub_mirror: The dockerhub mirror to use for runners.
ssh_debug_connections: The information on the ssh debug services.
otel_collector_config: The configuration for the OpenTelemetry collector.
custom_pre_job_script: The custom pre-job script to run before the job.
"""

Expand All @@ -103,6 +104,7 @@ class SupportServiceConfig(BaseModel):
dockerhub_mirror: str | None
ssh_debug_connections: "list[SSHDebugConnection]"
custom_pre_job_script: str | None
otel_collector_config: Optional["OtelCollectorConfig"] = None

@root_validator(pre=False, skip_on_failure=True)
@classmethod
Expand All @@ -127,6 +129,18 @@ def check_use_aproxy(cls, values: dict) -> dict:
return values


class OtelCollectorConfig(BaseModel):
"""Configuration for OpenTelemetry collector.

Attributes:
host: The OpenTelemetry collector hostname.
port: The OpenTelemetry collector port.
"""

host: str
port: int = Field(gt=0, le=65535)


class ProxyConfig(BaseModel):
"""Proxy configuration.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,18 +170,26 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str:
if service_config.ssh_debug_connections
else None
)
otel_collector_config = service_config.otel_collector_config
otel_collector_endpoint = (
f"{otel_collector_config.host}:{otel_collector_config.port}"
if otel_collector_config
else ""
)
env_contents = jinja.get_template("env.j2").render(
pre_job_script=str(PRE_JOB_SCRIPT),
dockerhub_mirror=service_config.dockerhub_mirror or "",
ssh_debug_info=ssh_debug_info,
tmate_server_proxy=runner_http_proxy,
otel_collector_endpoint=otel_collector_endpoint,
)
pre_job_contents_dict = {
"issue_metrics": True,
"metrics_exchange_path": str(METRICS_EXCHANGE_PATH),
"do_repo_policy_check": False,
"custom_pre_job_script": service_config.custom_pre_job_script,
"allow_external_contributor": self._config.allow_external_contributor,
"otel_collector_endpoint": otel_collector_endpoint,
}

pre_job_contents = jinja.get_template("pre-job.j2").render(pre_job_contents_dict)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ TMATE_SERVER_HOST={{ssh_debug_info.local_proxy_host}}
TMATE_SERVER_PORT={{ssh_debug_info.local_proxy_port}}
{% endif %}
{% endif %}
{% if otel_collector_endpoint %}
ACTION_OTEL_EXPORTER_OTLP_ENDPOINT={{otel_collector_endpoint}}
{% endif %}
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,57 @@ jq -n \
logger -s "Contributor check passed - proceeding to execute jobs"
{% endif %}

# Setup the OpenTelemetry collector configurations.
{% if otel_collector_endpoint %}
/usr/bin/logger -s "OpenTelemetry collector is enabled."
/usr/bin/logger -s "Additional OpenTelemetery collector configuration can be added."
/usr/bin/logger -s "The exporter endpoint is at the environment variable ACTION_OTEL_EXPORTER_OTLP_ENDPOINT."
/usr/bin/sudo /usr/bin/mkdir -p /etc/otelcol/config.d
/usr/bin/sudo /usr/bin/touch /etc/otelcol/config.d/github.yaml
/usr/bin/sudo /usr/bin/tee /etc/otelcol/config.d/github.yaml <<EOF
receivers:
hostmetrics:
collection_interval: 10s
scrapers:
cpu:
memory:
disk:
filesystem:
network:
load:
processors:
attributes/github_labels:
actions:
- key: github_runner
action: upsert
value: "$RUNNER_NAME"
- key: github_workflow
action: upsert
value: "$GITHUB_WORKFLOW"
- key: github_job
action: upsert
value: "$GITHUB_JOB"
- key: github_repository
action: upsert
value: "$GITHUB_REPOSITORY"
batch:
exporters:
otlp/mimir:
Comment thread
florentianayuwono marked this conversation as resolved.
endpoint: {{ otel_collector_endpoint }}
Comment thread
florentianayuwono marked this conversation as resolved.
tls:
insecure: true
Comment thread
yhaliaw marked this conversation as resolved.
service:
pipelines:
metrics:
receivers: [hostmetrics]
processors: [attributes/github_labels, batch]
exporters: [otlp/mimir]
EOF

/usr/bin/sudo /usr/bin/snap enable opentelemetry-collector
/usr/bin/sudo /usr/bin/snap start opentelemetry-collector
Comment thread
yhaliaw marked this conversation as resolved.
{% endif %}

if [[ -n "$DOCKERHUB_MIRROR" ]]; then
logger -s "A private docker registry is setup as a dockerhub mirror for this self-hosted runner."
logger -s "The docker daemon on this self-hosted runner is configured to use the dockerhub mirror."
Expand Down
49 changes: 49 additions & 0 deletions src/charm_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import yaml
from github_runner_manager.configuration import ProxyConfig, SSHDebugConnection
from github_runner_manager.configuration.base import OtelCollectorConfig
from github_runner_manager.configuration.github import (
GitHubAppAuth,
GitHubAuth,
Expand Down Expand Up @@ -69,6 +70,7 @@
VIRTUAL_MACHINES_CONFIG_NAME = "virtual-machines"
CUSTOM_PRE_JOB_SCRIPT_CONFIG_NAME = "pre-job-script"
RUNNER_MANAGER_LOG_LEVEL_CONFIG_NAME = "runner-manager-log-level"
OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME = "otel-collector-endpoint"

# Integration names
COS_AGENT_INTEGRATION_NAME = "cos-agent"
Expand Down Expand Up @@ -840,6 +842,39 @@ def _build_ssh_debug_connection_from_charm(charm: CharmBase) -> list[SSHDebugCon
return ssh_debug_connections


def _build_otel_collector_config_from_charm(charm: CharmBase) -> OtelCollectorConfig | None:
"""Initialize the OtelCollectorConfig from charm configuration.

Args:
charm: The charm instance.

Returns:
OtelCollectorConfig if endpoint config is set; otherwise None.
"""
endpoint = cast(str, charm.config.get(OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME, ""))
if not endpoint:
return None

parsed_endpoint = urlsplit(f"//{endpoint}")
if not parsed_endpoint.hostname or parsed_endpoint.port is None:
raise CharmConfigInvalidError(
f"Invalid {OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME} config, expected host:port"
)

if (
parsed_endpoint.username
or parsed_endpoint.password
or parsed_endpoint.path
or parsed_endpoint.query
or parsed_endpoint.fragment
):
raise CharmConfigInvalidError(
f"Invalid {OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME} config, expected host:port"
)
Comment thread
yhaliaw marked this conversation as resolved.

return OtelCollectorConfig(host=parsed_endpoint.hostname, port=parsed_endpoint.port)


def _build_planner_config_from_charm(charm: CharmBase) -> PlannerConfig | None:
"""Initialize planner endpoint and token from relation data.

Expand Down Expand Up @@ -896,6 +931,7 @@ class CharmState: # pylint: disable=too-many-instance-attributes
runner_proxy_config: Proxy-related configuration for the runner.
runner_config: The charm configuration related to runner VM configuration.
ssh_debug_connections: SSH debug connections configuration information.
otel_collector_config: OpenTelemetry collector configuration information.
planner_config: Planner endpoint and token from relation data.
"""

Expand All @@ -905,6 +941,7 @@ class CharmState: # pylint: disable=too-many-instance-attributes
charm_config: CharmConfig
runner_config: OpenstackRunnerConfig
ssh_debug_connections: list[SSHDebugConnection]
otel_collector_config: OtelCollectorConfig | None
planner_config: PlannerConfig | None

@classmethod
Expand All @@ -923,6 +960,11 @@ def _store_state(cls, state: "CharmState") -> None:
state_dict["ssh_debug_connections"] = [
debug_info.json() for debug_info in state_dict["ssh_debug_connections"]
]
state_dict["otel_collector_config"] = (
json.loads(state_dict["otel_collector_config"].json())
if state_dict["otel_collector_config"]
else None
)
json_data = json.dumps(state_dict, ensure_ascii=False)
CHARM_STATE_PATH.write_text(json_data, encoding="utf-8")

Expand Down Expand Up @@ -975,6 +1017,12 @@ def from_charm(cls, charm: CharmBase) -> "CharmState": # noqa: C901
logger.error("Invalid SSH debug info: %s.", exc)
raise CharmConfigInvalidError("Invalid SSH Debug info") from exc

try:
otel_collector_config = _build_otel_collector_config_from_charm(charm)
except (ValidationError, ValueError) as exc:
logger.error("Invalid OpenTelemetry collector config: %s.", exc)
raise CharmConfigInvalidError("Invalid OpenTelemetry collector config") from exc

planner_config = _build_planner_config_from_charm(charm)

state = cls(
Expand All @@ -984,6 +1032,7 @@ def from_charm(cls, charm: CharmBase) -> "CharmState": # noqa: C901
charm_config=charm_config,
runner_config=runner_config,
ssh_debug_connections=ssh_debug_connections,
otel_collector_config=otel_collector_config,
planner_config=planner_config,
)

Expand Down
1 change: 1 addition & 0 deletions src/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def create_application_configuration(
runner_proxy_config=state.runner_proxy_config,
dockerhub_mirror=state.charm_config.dockerhub_mirror,
ssh_debug_connections=state.ssh_debug_connections,
otel_collector_config=state.otel_collector_config,
use_aproxy=state.charm_config.use_aproxy,
aproxy_exclude_addresses=state.charm_config.aproxy_exclude_addresses,
aproxy_redirect_ports=state.charm_config.aproxy_redirect_ports,
Expand Down
7 changes: 4 additions & 3 deletions tests/integration/helpers/openstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,11 @@ def run_in_instance(
exit_code, _, _ = run_in_unit(self.juju, unit_name, f"ls {key_path}")
assert exit_code == 0, f"Unable to find key file {key_path}"
ssh_cmd = f'ssh -i {key_path} -o "StrictHostKeyChecking no" ubuntu@{ip} {command}'
ssh_cmd_as_ubuntu_user = f"su - ubuntu -c '{ssh_cmd}'"
logging.warning("ssh_cmd: %s", ssh_cmd_as_ubuntu_user)
# The SSH command needs to be run as the manager user to have access to the SSH keys.
ssh_cmd_as_manager_user = f"su - {constants.RUNNER_MANAGER_USER} -c '{ssh_cmd}'"
logging.warning("ssh_cmd: %s", ssh_cmd_as_manager_user)
exit_code, stdout, stderr = run_in_unit(
self.juju, unit_name, ssh_cmd_as_ubuntu_user, timeout
self.juju, unit_name, ssh_cmd_as_manager_user, timeout
)
logger.info(
"Run command '%s' in runner with result %s: '%s' '%s'",
Expand Down
50 changes: 49 additions & 1 deletion tests/integration/test_charm_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
from github.Branch import Branch
from github.Repository import Repository

from charm_state import BASE_VIRTUAL_MACHINES_CONFIG_NAME, CUSTOM_PRE_JOB_SCRIPT_CONFIG_NAME
from charm_state import (
BASE_VIRTUAL_MACHINES_CONFIG_NAME,
CUSTOM_PRE_JOB_SCRIPT_CONFIG_NAME,
OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME,
)
from tests.integration.helpers.common import (
DISPATCH_TEST_WORKFLOW_FILENAME,
DISPATCH_WAIT_TEST_WORKFLOW_FILENAME,
Expand Down Expand Up @@ -186,3 +190,47 @@ def test_custom_pre_job_script(
logs = get_job_logs(workflow_run.jobs("latest")[0])
assert "SSH config" in logs
assert "proxycommand socat - PROXY:squid.internal:%h:%p,proxyport=3128" in logs


@pytest.mark.openstack
@pytest.mark.abort_on_fail
def test_otel_collector_endpoint_pre_job_installs_config(
juju: jubilant.Juju,
app: str,
github_repository: Repository,
test_github_branch: Branch,
instance_helper: OpenStackInstanceHelper,
) -> None:
"""
arrange: A working application with one runner and otel collector endpoint configured.
act: Dispatch a workflow to run pre-job script.
assert: The workflow writes otel collector config to /etc/otelcol/config.d/github.yaml.
"""
endpoint = "10.10.0.12:4317"
juju.config(
app,
values={
BASE_VIRTUAL_MACHINES_CONFIG_NAME: "1",
OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME: endpoint,
},
)
wait_for_runner_ready(juju, app)

dispatch_workflow(
app_name=app,
branch=test_github_branch,
github_repository=github_repository,
conclusion="success",
workflow_id_or_name=DISPATCH_TEST_WORKFLOW_FILENAME,
dispatch_input={"runner": app},
)

exit_code, stdout, stderr = instance_helper.run_in_instance(
unit_name=f"{app}/0",
command="sudo cat /etc/otelcol/config.d/github.yaml",
)

assert exit_code == 0, stderr
assert stdout is not None
assert "exporters:" in stdout
assert f"endpoint: {endpoint}" in stdout
1 change: 1 addition & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,5 +188,6 @@ def complete_charm_state_fixture():
ed25519_fingerprint="SHA256:ed25519",
),
],
otel_collector_config=None,
planner_config=None,
)
Loading
Loading