From 241fed5429e37d7b87d91cfc67f53eebce663a47 Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Tue, 14 Apr 2026 14:23:14 +0800 Subject: [PATCH 01/15] Change runner to connect to otel collector on pre-job --- charmcraft.yaml | 6 +++ .../configuration/base.py | 10 ++++ .../openstack_runner_manager.py | 4 ++ .../templates/pre-job.j2 | 43 +++++++++++++++++ src/charm_state.py | 37 +++++++++++++++ src/factories.py | 1 + tests/unit/conftest.py | 1 + tests/unit/test_charm_state.py | 46 +++++++++++++++++++ tests/unit/test_factories.py | 21 +++++++++ 9 files changed, 169 insertions(+) diff --git a/charmcraft.yaml b/charmcraft.yaml index 236385f112..1e8f4698f0 100644 --- a/charmcraft.yaml +++ b/charmcraft.yaml @@ -225,6 +225,12 @@ config: description: >- The log level for the runner manager application. The value can be CRITICAL, FATAL, ERROR, WARNING, INFO, or DEBUG. + otel-collector-endpoint: + type: string + default: "" + description: >- + The endpoint to send OpenTelemetry traces to in the format "host:port". If not set, OpenTelemetry + will be disabled. actions: check-runners: diff --git a/github-runner-manager/src/github_runner_manager/configuration/base.py b/github-runner-manager/src/github_runner_manager/configuration/base.py index 29eb1f69a6..48435f6886 100644 --- a/github-runner-manager/src/github_runner_manager/configuration/base.py +++ b/github-runner-manager/src/github_runner_manager/configuration/base.py @@ -103,6 +103,7 @@ class SupportServiceConfig(BaseModel): dockerhub_mirror: str | None ssh_debug_connections: "list[SSHDebugConnection]" custom_pre_job_script: str | None + otel_collector_config: Optional["OtelCollectorConfig"] = None @root_validator(pre=False, skip_on_failure=True) @classmethod @@ -126,6 +127,15 @@ def check_use_aproxy(cls, values: dict) -> dict: raise ValueError("aproxy requires the runner http or https to be set") return values +class OtelCollectorConfig(BaseModel): + """Configuration for OpenTelemetry collector. + + Attributes: + host: The OpenTelemetry collector hostname. + port: The OpenTelemetry collector port. + """ + host: str + port: int = Field(0, gt=0, le=65535) class ProxyConfig(BaseModel): """Proxy configuration. diff --git a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py index 26075b053c..92ae0ed3b4 100644 --- a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py +++ b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py @@ -175,6 +175,10 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: dockerhub_mirror=service_config.dockerhub_mirror or "", ssh_debug_info=ssh_debug_info, tmate_server_proxy=runner_http_proxy, + otel_collector_endpoint=( + f"{service_config.otel_collector_config.host}:{service_config.otel_collector_config.port}" + if service_config.otel_collector_config else "" + ), ) pre_job_contents_dict = { "issue_metrics": True, diff --git a/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 b/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 index 54725fa1cd..89e21d3a5c 100644 --- a/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 +++ b/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 @@ -131,6 +131,49 @@ jq -n \ logger -s "Contributor check passed - proceeding to execute jobs" {% endif %} +# Setup the OpenTelemetry collector configurations. +{% if otel_collector_endpoint %} +/usr/bin/sudo /usr/bin/tee /etc/otelcol/config.d/github.yaml < list[SSHDebugCon ) return ssh_debug_connections +def _build_otel_collector_config_from_charm(charm: CharmBase) -> OtelCollectorConfig | None: + """Initialize the OtelCollectorConfig from charm configuration. + + Args: + charm: The charm instance. + + Returns: + OtelCollectorConfig if endpoint config is set; otherwise None. + """ + endpoint = cast(str, charm.config.get(OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME, "")) + if not endpoint: + return None + + parsed_endpoint = urlsplit(f"//{endpoint}") + if not parsed_endpoint.hostname or parsed_endpoint.port is None: + raise CharmConfigInvalidError( + f"Invalid {OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME} config, expected host:port" + ) + + if parsed_endpoint.username or parsed_endpoint.password or parsed_endpoint.path: + raise CharmConfigInvalidError( + f"Invalid {OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME} config, expected host:port" + ) + + return OtelCollectorConfig(host=parsed_endpoint.hostname, port=parsed_endpoint.port) + def _build_planner_config_from_charm(charm: CharmBase) -> PlannerConfig | None: """Initialize planner endpoint and token from relation data. @@ -851,6 +879,7 @@ class CharmState: # pylint: disable=too-many-instance-attributes charm_config: CharmConfig runner_config: OpenstackRunnerConfig ssh_debug_connections: list[SSHDebugConnection] + otel_collector_config: OtelCollectorConfig | None planner_config: PlannerConfig | None @classmethod @@ -869,6 +898,7 @@ def _store_state(cls, state: "CharmState") -> None: state_dict["ssh_debug_connections"] = [ debug_info.json() for debug_info in state_dict["ssh_debug_connections"] ] + state_dict["otel_collector_config"] = json.loads(state_dict["otel_collector_config"].json()) if state_dict["otel_collector_config"] else None json_data = json.dumps(state_dict, ensure_ascii=False) CHARM_STATE_PATH.write_text(json_data, encoding="utf-8") @@ -921,6 +951,12 @@ def from_charm(cls, charm: CharmBase) -> "CharmState": # noqa: C901 logger.error("Invalid SSH debug info: %s.", exc) raise CharmConfigInvalidError("Invalid SSH Debug info") from exc + try: + otel_collector_config = _build_otel_collector_config_from_charm(charm) + except (ValidationError, ValueError) as exc: + logger.error("Invalid OpenTelemetry collector config: %s.", exc) + raise CharmConfigInvalidError("Invalid OpenTelemetry collector config") from exc + planner_config = _build_planner_config_from_charm(charm) state = cls( @@ -930,6 +966,7 @@ def from_charm(cls, charm: CharmBase) -> "CharmState": # noqa: C901 charm_config=charm_config, runner_config=runner_config, ssh_debug_connections=ssh_debug_connections, + otel_collector_config=otel_collector_config, planner_config=planner_config, ) diff --git a/src/factories.py b/src/factories.py index ae34da9686..acb767a985 100644 --- a/src/factories.py +++ b/src/factories.py @@ -53,6 +53,7 @@ def create_application_configuration( runner_proxy_config=state.runner_proxy_config, dockerhub_mirror=state.charm_config.dockerhub_mirror, ssh_debug_connections=state.ssh_debug_connections, + otel_collector_config=state.otel_collector_config, use_aproxy=state.charm_config.use_aproxy, aproxy_exclude_addresses=state.charm_config.aproxy_exclude_addresses, aproxy_redirect_ports=state.charm_config.aproxy_redirect_ports, diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 3aaf3f1c8b..8ef6020cac 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -188,5 +188,6 @@ def complete_charm_state_fixture(): ed25519_fingerprint="SHA256:ed25519", ), ], + otel_collector_config=None, planner_config=None, ) diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py index 6e17080f35..de73494aa7 100644 --- a/tests/unit/test_charm_state.py +++ b/tests/unit/test_charm_state.py @@ -29,6 +29,7 @@ LABELS_CONFIG_NAME, MANAGER_SSH_PROXY_COMMAND_CONFIG_NAME, MAX_TOTAL_VIRTUAL_MACHINES_CONFIG_NAME, + OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME, OPENSTACK_CLOUDS_YAML_CONFIG_NAME, OPENSTACK_FLAVOR_CONFIG_NAME, PATH_CONFIG_NAME, @@ -50,6 +51,7 @@ PlannerConfig, ProxyConfig, SSHDebugConnection, + _build_otel_collector_config_from_charm, _build_planner_config_from_charm, ) from tests.unit.factories import MockGithubRunnerCharmFactory @@ -1151,3 +1153,47 @@ def test_invalid_aproxy_config_in_charm_state( with pytest.raises(CharmConfigInvalidError): CharmState.from_charm(mock_charm) + + +def test_build_otel_collector_config_from_charm_not_set() -> None: + """ + arrange: Mock CharmBase without otel collector endpoint. + act: Build otel collector config. + assert: None is returned. + """ + mock_charm = MockGithubRunnerCharmFactory() + + otel_collector_config = _build_otel_collector_config_from_charm(mock_charm) + + assert otel_collector_config is None + + +@pytest.mark.parametrize("hostname, port", [("10.10.0.12", 42), ("mock_hostname", 823)]) +def test_build_otel_collector_config_from_charm_valid_endpoint(hostname: str, port: int) -> None: + """ + arrange: Mock CharmBase with a valid otel collector endpoint. + act: Build otel collector config. + assert: Parsed host and port are returned. + """ + mock_charm = MockGithubRunnerCharmFactory() + mock_charm.config[OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME] = f"{hostname}:{port}" + + otel_collector_config = _build_otel_collector_config_from_charm(mock_charm) + + assert otel_collector_config is not None + assert str(otel_collector_config.host) == hostname + assert otel_collector_config.port == port + + +@pytest.mark.parametrize("endpoint", ["10.10.0.12", "10.10.0.12:", "http://10.10.0.12:4317", "fake_hostname"]) +def test_build_otel_collector_config_from_charm_invalid_endpoint(endpoint: str) -> None: + """ + arrange: Mock CharmBase with malformed endpoint formats. + act: Build otel collector config. + assert: CharmConfigInvalidError is raised. + """ + mock_charm = MockGithubRunnerCharmFactory() + mock_charm.config[OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME] = endpoint + + with pytest.raises(CharmConfigInvalidError): + _build_otel_collector_config_from_charm(mock_charm) diff --git a/tests/unit/test_factories.py b/tests/unit/test_factories.py index 3b3cbc1548..ec88294a52 100644 --- a/tests/unit/test_factories.py +++ b/tests/unit/test_factories.py @@ -7,6 +7,7 @@ ApplicationConfiguration, Flavor, Image, + OtelCollectorConfig, ProxyConfig, RunnerCombination, RunnerConfiguration, @@ -149,6 +150,26 @@ def test_create_application_configuration_with_planner( assert app_configuration.planner_token == "planner-token-value" +def test_create_application_configuration_with_otel_collector_config( + complete_charm_state: charm_state.CharmState, +): + """ + arrange: Prepare CharmState with otel collector config. + act: Call create_application_configuration. + assert: The service config contains the collector endpoint. + """ + state = dataclasses.replace( + complete_charm_state, + otel_collector_config=OtelCollectorConfig(host="10.10.0.12", port=4317), + ) + + app_configuration = factories.create_application_configuration(state, "app_name", "unit_name") + + assert app_configuration.service_config.otel_collector_config is not None + assert str(app_configuration.service_config.otel_collector_config.host) == "10.10.0.12" + assert app_configuration.service_config.otel_collector_config.port == 4317 + + @pytest.mark.parametrize( "charm_config_updates, expected_auth", [ From 6b39a3e034965bdec61ef4bfdf32d423a7af2fd6 Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Tue, 14 Apr 2026 14:40:51 +0800 Subject: [PATCH 02/15] Fix incorrect passing of otel collector endpoint to pre-job --- .../src/github_runner_manager/configuration/base.py | 3 +++ .../openstack_cloud/openstack_runner_manager.py | 10 ++++++---- .../openstack_cloud/test_openstack_runner_manager.py | 12 ++++++++---- src/charm_state.py | 7 ++++++- tests/unit/test_charm_state.py | 6 ++++-- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/github-runner-manager/src/github_runner_manager/configuration/base.py b/github-runner-manager/src/github_runner_manager/configuration/base.py index 48435f6886..fd26e928c2 100644 --- a/github-runner-manager/src/github_runner_manager/configuration/base.py +++ b/github-runner-manager/src/github_runner_manager/configuration/base.py @@ -127,6 +127,7 @@ def check_use_aproxy(cls, values: dict) -> dict: raise ValueError("aproxy requires the runner http or https to be set") return values + class OtelCollectorConfig(BaseModel): """Configuration for OpenTelemetry collector. @@ -134,9 +135,11 @@ class OtelCollectorConfig(BaseModel): host: The OpenTelemetry collector hostname. port: The OpenTelemetry collector port. """ + host: str port: int = Field(0, gt=0, le=65535) + class ProxyConfig(BaseModel): """Proxy configuration. diff --git a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py index 92ae0ed3b4..3763780d0d 100644 --- a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py +++ b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py @@ -175,10 +175,7 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: dockerhub_mirror=service_config.dockerhub_mirror or "", ssh_debug_info=ssh_debug_info, tmate_server_proxy=runner_http_proxy, - otel_collector_endpoint=( - f"{service_config.otel_collector_config.host}:{service_config.otel_collector_config.port}" - if service_config.otel_collector_config else "" - ), + otel_collector_endpoint=(), ) pre_job_contents_dict = { "issue_metrics": True, @@ -186,6 +183,11 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: "do_repo_policy_check": False, "custom_pre_job_script": service_config.custom_pre_job_script, "allow_external_contributor": self._config.allow_external_contributor, + "otel_collector_endpoint": ( + f"{service_config.otel_collector_config.host}:{service_config.otel_collector_config.port}" + if service_config.otel_collector_config + else "" + ), } pre_job_contents = jinja.get_template("pre-job.j2").render(pre_job_contents_dict) diff --git a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py index 9f341b35b3..3f70134082 100644 --- a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py +++ b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py @@ -91,7 +91,8 @@ def runner_metrics_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: ["80", "443"], ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"], True, - textwrap.dedent("""\ + textwrap.dedent( + """\ table ip aproxy { set exclude { type ipv4_addr; @@ -107,14 +108,16 @@ def runner_metrics_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: ip daddr != @exclude tcp dport { 80, 443 } counter dnat to \\$default-ipv4:54969 } } - """), + """ + ), id="aproxy default config", ), pytest.param( ["80", "443"], [], True, - textwrap.dedent("""\ + textwrap.dedent( + """\ table ip aproxy { set exclude { type ipv4_addr; @@ -130,7 +133,8 @@ def runner_metrics_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: ip daddr != @exclude tcp dport { 80, 443 } counter dnat to \\$default-ipv4:54969 } } - """), + """ + ), id="aproxy with no aproxy_exclude_addresses", ), ], diff --git a/src/charm_state.py b/src/charm_state.py index 6124edd48e..ed8590eb73 100644 --- a/src/charm_state.py +++ b/src/charm_state.py @@ -786,6 +786,7 @@ def _build_ssh_debug_connection_from_charm(charm: CharmBase) -> list[SSHDebugCon ) return ssh_debug_connections + def _build_otel_collector_config_from_charm(charm: CharmBase) -> OtelCollectorConfig | None: """Initialize the OtelCollectorConfig from charm configuration. @@ -898,7 +899,11 @@ def _store_state(cls, state: "CharmState") -> None: state_dict["ssh_debug_connections"] = [ debug_info.json() for debug_info in state_dict["ssh_debug_connections"] ] - state_dict["otel_collector_config"] = json.loads(state_dict["otel_collector_config"].json()) if state_dict["otel_collector_config"] else None + state_dict["otel_collector_config"] = ( + json.loads(state_dict["otel_collector_config"].json()) + if state_dict["otel_collector_config"] + else None + ) json_data = json.dumps(state_dict, ensure_ascii=False) CHARM_STATE_PATH.write_text(json_data, encoding="utf-8") diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py index de73494aa7..755750d376 100644 --- a/tests/unit/test_charm_state.py +++ b/tests/unit/test_charm_state.py @@ -29,9 +29,9 @@ LABELS_CONFIG_NAME, MANAGER_SSH_PROXY_COMMAND_CONFIG_NAME, MAX_TOTAL_VIRTUAL_MACHINES_CONFIG_NAME, - OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME, OPENSTACK_CLOUDS_YAML_CONFIG_NAME, OPENSTACK_FLAVOR_CONFIG_NAME, + OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME, PATH_CONFIG_NAME, PLANNER_INTEGRATION_NAME, RECONCILE_INTERVAL_CONFIG_NAME, @@ -1185,7 +1185,9 @@ def test_build_otel_collector_config_from_charm_valid_endpoint(hostname: str, po assert otel_collector_config.port == port -@pytest.mark.parametrize("endpoint", ["10.10.0.12", "10.10.0.12:", "http://10.10.0.12:4317", "fake_hostname"]) +@pytest.mark.parametrize( + "endpoint", ["10.10.0.12", "10.10.0.12:", "http://10.10.0.12:4317", "fake_hostname"] +) def test_build_otel_collector_config_from_charm_invalid_endpoint(endpoint: str) -> None: """ arrange: Mock CharmBase with malformed endpoint formats. From ef56b090cd4fdce785f183604934d6d27f4d459a Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Wed, 15 Apr 2026 10:05:40 +0800 Subject: [PATCH 03/15] Add integration test for otel-collector-endpoint config --- tests/integration/test_charm_runner.py | 45 +++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_charm_runner.py b/tests/integration/test_charm_runner.py index 93ebf94bf3..af916d6ee3 100644 --- a/tests/integration/test_charm_runner.py +++ b/tests/integration/test_charm_runner.py @@ -12,7 +12,7 @@ from juju.action import Action from juju.application import Application -from charm_state import BASE_VIRTUAL_MACHINES_CONFIG_NAME, CUSTOM_PRE_JOB_SCRIPT_CONFIG_NAME +from charm_state import BASE_VIRTUAL_MACHINES_CONFIG_NAME, CUSTOM_PRE_JOB_SCRIPT_CONFIG_NAME, OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME from tests.integration.helpers.common import ( DISPATCH_TEST_WORKFLOW_FILENAME, DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, @@ -184,3 +184,46 @@ async def test_custom_pre_job_script( logs = get_job_logs(workflow_run.jobs("latest")[0]) assert "SSH config" in logs assert "proxycommand socat - PROXY:squid.internal:%h:%p,proxyport=3128" in logs + + +@pytest.mark.openstack +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_otel_collector_endpoint_pre_job_installs_config( + app: Application, + github_repository: Repository, + test_github_branch: Branch, + instance_helper: OpenStackInstanceHelper, +) -> None: + """ + arrange: A working application with one runner and otel collector endpoint configured. + act: Dispatch a workflow to run pre-job script. + assert: The workflow writes otel collector config to /etc/otelcol/config.d/github.yaml. + """ + endpoint = "10.10.0.12:4317" + await app.set_config( + { + BASE_VIRTUAL_MACHINES_CONFIG_NAME: "1", + OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME: endpoint, + } + ) + await wait_for_runner_ready(app) + + await dispatch_workflow( + app=app, + branch=test_github_branch, + github_repository=github_repository, + conclusion="success", + workflow_id_or_name=DISPATCH_TEST_WORKFLOW_FILENAME, + dispatch_input={"runner": app.name}, + ) + + exit_code, stdout, stderr = await instance_helper.run_in_instance( + unit=app.units[0], + command="sudo cat /etc/otelcol/config.d/github.yaml", + ) + + assert exit_code == 0, stderr + assert stdout is not None + assert "exporters:" in stdout + assert f"endpoint: {endpoint}" in stdout From 57591cbac5dbf0a9720e09f698aaae9179fb293d Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Thu, 16 Apr 2026 10:54:38 +0800 Subject: [PATCH 04/15] Fix lint issues --- .../src/github_runner_manager/configuration/base.py | 1 + .../openstack_cloud/openstack_runner_manager.py | 6 +++--- .../openstack_cloud/test_openstack_runner_manager.py | 12 ++++-------- src/charm_state.py | 1 + tests/integration/test_charm_runner.py | 6 +++++- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/github-runner-manager/src/github_runner_manager/configuration/base.py b/github-runner-manager/src/github_runner_manager/configuration/base.py index fd26e928c2..8976073c48 100644 --- a/github-runner-manager/src/github_runner_manager/configuration/base.py +++ b/github-runner-manager/src/github_runner_manager/configuration/base.py @@ -91,6 +91,7 @@ class SupportServiceConfig(BaseModel): aproxy_redirect_ports: A list of ports to redirect to the aproxy proxy. dockerhub_mirror: The dockerhub mirror to use for runners. ssh_debug_connections: The information on the ssh debug services. + otel_collector_config: The configuration for the OpenTelemetry collector. custom_pre_job_script: The custom pre-job script to run before the job. """ diff --git a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py index 3763780d0d..fc7efd38ca 100644 --- a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py +++ b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py @@ -175,8 +175,8 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: dockerhub_mirror=service_config.dockerhub_mirror or "", ssh_debug_info=ssh_debug_info, tmate_server_proxy=runner_http_proxy, - otel_collector_endpoint=(), ) + otel_collector_config = service_config.otel_collector_config pre_job_contents_dict = { "issue_metrics": True, "metrics_exchange_path": str(METRICS_EXCHANGE_PATH), @@ -184,8 +184,8 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: "custom_pre_job_script": service_config.custom_pre_job_script, "allow_external_contributor": self._config.allow_external_contributor, "otel_collector_endpoint": ( - f"{service_config.otel_collector_config.host}:{service_config.otel_collector_config.port}" - if service_config.otel_collector_config + f"{otel_collector_config.host}:{otel_collector_config.port}" + if otel_collector_config else "" ), } diff --git a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py index 3f70134082..9f341b35b3 100644 --- a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py +++ b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py @@ -91,8 +91,7 @@ def runner_metrics_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: ["80", "443"], ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"], True, - textwrap.dedent( - """\ + textwrap.dedent("""\ table ip aproxy { set exclude { type ipv4_addr; @@ -108,16 +107,14 @@ def runner_metrics_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: ip daddr != @exclude tcp dport { 80, 443 } counter dnat to \\$default-ipv4:54969 } } - """ - ), + """), id="aproxy default config", ), pytest.param( ["80", "443"], [], True, - textwrap.dedent( - """\ + textwrap.dedent("""\ table ip aproxy { set exclude { type ipv4_addr; @@ -133,8 +130,7 @@ def runner_metrics_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: ip daddr != @exclude tcp dport { 80, 443 } counter dnat to \\$default-ipv4:54969 } } - """ - ), + """), id="aproxy with no aproxy_exclude_addresses", ), ], diff --git a/src/charm_state.py b/src/charm_state.py index ebc62b937a..2a1aa527dd 100644 --- a/src/charm_state.py +++ b/src/charm_state.py @@ -870,6 +870,7 @@ class CharmState: # pylint: disable=too-many-instance-attributes runner_proxy_config: Proxy-related configuration for the runner. runner_config: The charm configuration related to runner VM configuration. ssh_debug_connections: SSH debug connections configuration information. + otel_collector_config: OpenTelemetry collector configuration information. planner_config: Planner endpoint and token from relation data. """ diff --git a/tests/integration/test_charm_runner.py b/tests/integration/test_charm_runner.py index 66d368bf78..dd1c83095a 100644 --- a/tests/integration/test_charm_runner.py +++ b/tests/integration/test_charm_runner.py @@ -10,7 +10,11 @@ from github.Branch import Branch from github.Repository import Repository -from charm_state import BASE_VIRTUAL_MACHINES_CONFIG_NAME, CUSTOM_PRE_JOB_SCRIPT_CONFIG_NAME, OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME +from charm_state import ( + BASE_VIRTUAL_MACHINES_CONFIG_NAME, + CUSTOM_PRE_JOB_SCRIPT_CONFIG_NAME, + OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME, +) from tests.integration.helpers.common import ( DISPATCH_TEST_WORKFLOW_FILENAME, DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, From 6387748313dcedc0884c2356691db2caa5608cc9 Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Thu, 16 Apr 2026 13:52:27 +0800 Subject: [PATCH 05/15] Create empty otel config file prior to write --- .../src/github_runner_manager/templates/pre-job.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 b/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 index 89e21d3a5c..b685041c5f 100644 --- a/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 +++ b/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 @@ -133,6 +133,8 @@ jq -n \ # Setup the OpenTelemetry collector configurations. {% if otel_collector_endpoint %} +/usr/bin/sudo /usr/bin/mkdir -p /etc/otelcol/config.d +/usr/bin/sudo /usr/bin/touch /etc/otelcol/config.d/github.yaml /usr/bin/sudo /usr/bin/tee /etc/otelcol/config.d/github.yaml < Date: Thu, 16 Apr 2026 16:16:07 +0800 Subject: [PATCH 06/15] Update the integration test to use jubilant --- tests/integration/test_charm_runner.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_charm_runner.py b/tests/integration/test_charm_runner.py index dd1c83095a..d2ed7b5459 100644 --- a/tests/integration/test_charm_runner.py +++ b/tests/integration/test_charm_runner.py @@ -193,10 +193,10 @@ def test_custom_pre_job_script( @pytest.mark.openstack -@pytest.mark.asyncio @pytest.mark.abort_on_fail -async def test_otel_collector_endpoint_pre_job_installs_config( - app: Application, +def test_otel_collector_endpoint_pre_job_installs_config( + juju: jubilant.Juju, + app: str, github_repository: Repository, test_github_branch: Branch, instance_helper: OpenStackInstanceHelper, @@ -207,25 +207,26 @@ async def test_otel_collector_endpoint_pre_job_installs_config( assert: The workflow writes otel collector config to /etc/otelcol/config.d/github.yaml. """ endpoint = "10.10.0.12:4317" - await app.set_config( - { + juju.config( + app, + values={ BASE_VIRTUAL_MACHINES_CONFIG_NAME: "1", OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME: endpoint, - } + }, ) - await wait_for_runner_ready(app) + wait_for_runner_ready(juju, app) - await dispatch_workflow( - app=app, + dispatch_workflow( + app_name=app, branch=test_github_branch, github_repository=github_repository, conclusion="success", workflow_id_or_name=DISPATCH_TEST_WORKFLOW_FILENAME, - dispatch_input={"runner": app.name}, + dispatch_input={"runner": app}, ) - exit_code, stdout, stderr = await instance_helper.run_in_instance( - unit=app.units[0], + exit_code, stdout, stderr = instance_helper.run_in_instance( + unit_name=f"{app}/0", command="sudo cat /etc/otelcol/config.d/github.yaml", ) From c39464fd8aa5362131abca9523cb4327afaa0c23 Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Fri, 17 Apr 2026 08:33:22 +0800 Subject: [PATCH 07/15] Format --- tests/integration/conftest.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 15070d0b4e..d21eed78f4 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -581,8 +581,7 @@ def image_builder_fixture( series = dep_ctx.series any_charm_src_overwrite = { - "any_charm.py": textwrap.dedent( - f"""\ + "any_charm.py": textwrap.dedent(f"""\ from any_charm_base import AnyCharmBase class AnyCharm(AnyCharmBase): @@ -595,8 +594,7 @@ def _image_relation_changed(self, event): # Provide mock image relation data event.relation.data[self.unit]['id'] = '{openstack_config.test_image_id}' event.relation.data[self.unit]['tags'] = '{series}, amd64' - """ - ), + """), } logging.info( "Deploying fake image builder via any-charm for image ID %s", @@ -922,8 +920,7 @@ def mock_planner_app(juju: jubilant.Juju, planner_token_secret: str) -> Iterator planner_name = "planner" any_charm_src_overwrite = { - "any_charm.py": textwrap.dedent( - f"""\ + "any_charm.py": textwrap.dedent(f"""\ from any_charm_base import AnyCharmBase class AnyCharm(AnyCharmBase): @@ -937,8 +934,7 @@ def __init__(self, *args, **kwargs): def _on_planner_relation_changed(self, event): event.relation.data[self.app]["endpoint"] = "http://mock:8080" event.relation.data[self.app]["token"] = "{planner_token_secret}" - """ - ), + """), } juju.deploy( From d48c71509c62c28c5cfbe0bf57f13ec70b893fe8 Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Wed, 22 Apr 2026 16:52:12 +0800 Subject: [PATCH 08/15] Fix ssh key permission issues --- tests/integration/helpers/openstack.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py index cf6f4d19b7..0c06d690a3 100644 --- a/tests/integration/helpers/openstack.py +++ b/tests/integration/helpers/openstack.py @@ -125,10 +125,11 @@ def run_in_instance( exit_code, _, _ = run_in_unit(self.juju, unit_name, f"ls {key_path}") assert exit_code == 0, f"Unable to find key file {key_path}" ssh_cmd = f'ssh -i {key_path} -o "StrictHostKeyChecking no" ubuntu@{ip} {command}' - ssh_cmd_as_ubuntu_user = f"su - ubuntu -c '{ssh_cmd}'" - logging.warning("ssh_cmd: %s", ssh_cmd_as_ubuntu_user) + # The SSH command needs to be run as the manager user to have access to the SSH keys. + ssh_cmd_as_manager_user = f"su - {constants.RUNNER_MANAGER_USER} -c '{ssh_cmd}'" + logging.warning("ssh_cmd: %s", ssh_cmd_as_manager_user) exit_code, stdout, stderr = run_in_unit( - self.juju, unit_name, ssh_cmd_as_ubuntu_user, timeout + self.juju, unit_name, ssh_cmd_as_manager_user, timeout ) logger.info( "Run command '%s' in runner with result %s: '%s' '%s'", From 90de14a5c6b96ca5d4eda28020027ede3767403a Mon Sep 17 00:00:00 2001 From: Andrew Liaw Date: Thu, 23 Apr 2026 08:56:31 +0800 Subject: [PATCH 09/15] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../src/github_runner_manager/configuration/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github-runner-manager/src/github_runner_manager/configuration/base.py b/github-runner-manager/src/github_runner_manager/configuration/base.py index 8976073c48..7b227aff98 100644 --- a/github-runner-manager/src/github_runner_manager/configuration/base.py +++ b/github-runner-manager/src/github_runner_manager/configuration/base.py @@ -138,7 +138,7 @@ class OtelCollectorConfig(BaseModel): """ host: str - port: int = Field(0, gt=0, le=65535) + port: int = Field(gt=0, le=65535) class ProxyConfig(BaseModel): From 9b170ae6537cb68b5d1d345c713e878b7ca1e90c Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Thu, 23 Apr 2026 09:00:06 +0800 Subject: [PATCH 10/15] Update config descirption for otel-collector-endpoint --- charmcraft.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charmcraft.yaml b/charmcraft.yaml index 170371df10..910e91ae8e 100644 --- a/charmcraft.yaml +++ b/charmcraft.yaml @@ -244,7 +244,7 @@ config: type: string default: "" description: >- - The endpoint to send OpenTelemetry traces to in the format "host:port". If not set, OpenTelemetry + The endpoint to send OpenTelemetry metrics to in the format "host:port". If not set, OpenTelemetry will be disabled. actions: From 209eea241cf7e995911fcab908c931b5a06335a0 Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Thu, 23 Apr 2026 09:35:08 +0800 Subject: [PATCH 11/15] Fix the metric labels --- .../src/github_runner_manager/templates/pre-job.j2 | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 b/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 index b685041c5f..b1483005d4 100644 --- a/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 +++ b/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 @@ -152,9 +152,12 @@ processors: - key: github_runner action: upsert value: "$RUNNER_NAME" - - key: github_workflow_job + - key: github_workflow action: upsert - value: "$GITHUB_WORKFLOW_JOB" + value: "$GITHUB_WORKFLOW" + - key: github_job + action: upsert + value: "$GITHUB_JOB" - key: github_repository action: upsert value: "$GITHUB_REPOSITORY" From bc6412234e8f22192aaf023b04aecfae6ea7dcc2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 01:40:56 +0000 Subject: [PATCH 12/15] Reject query and fragment in OTel collector endpoint config Agent-Logs-Url: https://github.com/canonical/github-runner-operator/sessions/a046e899-791c-40b0-95f3-ccfef17c9536 Co-authored-by: yhaliaw <43424755+yhaliaw@users.noreply.github.com> --- src/charm_state.py | 8 +++++++- tests/unit/test_charm_state.py | 10 +++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/charm_state.py b/src/charm_state.py index 762e080ff1..b6ee705cd8 100644 --- a/src/charm_state.py +++ b/src/charm_state.py @@ -861,7 +861,13 @@ def _build_otel_collector_config_from_charm(charm: CharmBase) -> OtelCollectorCo f"Invalid {OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME} config, expected host:port" ) - if parsed_endpoint.username or parsed_endpoint.password or parsed_endpoint.path: + if ( + parsed_endpoint.username + or parsed_endpoint.password + or parsed_endpoint.path + or parsed_endpoint.query + or parsed_endpoint.fragment + ): raise CharmConfigInvalidError( f"Invalid {OTEL_COLLECTOR_ENDPOINT_CONFIG_NAME} config, expected host:port" ) diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py index c93f8a03cf..3ebbf05a13 100644 --- a/tests/unit/test_charm_state.py +++ b/tests/unit/test_charm_state.py @@ -1341,7 +1341,15 @@ def test_build_otel_collector_config_from_charm_valid_endpoint(hostname: str, po @pytest.mark.parametrize( - "endpoint", ["10.10.0.12", "10.10.0.12:", "http://10.10.0.12:4317", "fake_hostname"] + "endpoint", + [ + "10.10.0.12", + "10.10.0.12:", + "http://10.10.0.12:4317", + "fake_hostname", + "10.10.0.12:4317?x=y", + "10.10.0.12:4317#x", + ], ) def test_build_otel_collector_config_from_charm_invalid_endpoint(endpoint: str) -> None: """ From c5ed02d18fbd882a5516d1b7f2d4699c047ba67c Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Thu, 23 Apr 2026 15:58:33 +0800 Subject: [PATCH 13/15] Add env var to expose the endpoint of otel exporter --- .../openstack_cloud/openstack_runner_manager.py | 8 +++----- .../src/github_runner_manager/templates/env.j2 | 3 +++ .../src/github_runner_manager/templates/pre-job.j2 | 3 +++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py index fc7efd38ca..a8e3d17f11 100644 --- a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py +++ b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py @@ -170,11 +170,13 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: if service_config.ssh_debug_connections else None ) + otel_collector_endpoint = (f"{otel_collector_config.host}:{otel_collector_config.port}" if otel_collector_config else "") env_contents = jinja.get_template("env.j2").render( pre_job_script=str(PRE_JOB_SCRIPT), dockerhub_mirror=service_config.dockerhub_mirror or "", ssh_debug_info=ssh_debug_info, tmate_server_proxy=runner_http_proxy, + otel_collector_endpoint=otel_collector_endpoint, ) otel_collector_config = service_config.otel_collector_config pre_job_contents_dict = { @@ -183,11 +185,7 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: "do_repo_policy_check": False, "custom_pre_job_script": service_config.custom_pre_job_script, "allow_external_contributor": self._config.allow_external_contributor, - "otel_collector_endpoint": ( - f"{otel_collector_config.host}:{otel_collector_config.port}" - if otel_collector_config - else "" - ), + "otel_collector_endpoint": otel_collector_endpoint, } pre_job_contents = jinja.get_template("pre-job.j2").render(pre_job_contents_dict) diff --git a/github-runner-manager/src/github_runner_manager/templates/env.j2 b/github-runner-manager/src/github_runner_manager/templates/env.j2 index 6814afb09d..d210dd396a 100644 --- a/github-runner-manager/src/github_runner_manager/templates/env.j2 +++ b/github-runner-manager/src/github_runner_manager/templates/env.j2 @@ -15,3 +15,6 @@ TMATE_SERVER_HOST={{ssh_debug_info.local_proxy_host}} TMATE_SERVER_PORT={{ssh_debug_info.local_proxy_port}} {% endif %} {% endif %} +{% if otel_collector_endpoint %} +ACTION_OTEL_EXPORTER_OTLP_ENDPOINT={{otel_collector_endpoint}} +{% endif %} \ No newline at end of file diff --git a/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 b/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 index b1483005d4..c7d5f9b9be 100644 --- a/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 +++ b/github-runner-manager/src/github_runner_manager/templates/pre-job.j2 @@ -133,6 +133,9 @@ jq -n \ # Setup the OpenTelemetry collector configurations. {% if otel_collector_endpoint %} +/usr/bin/logger -s "OpenTelemetry collector is enabled." +/usr/bin/logger -s "Additional OpenTelemetery collector configuration can be added." +/usr/bin/logger -s "The exporter endpoint is at the environment variable ACTION_OTEL_EXPORTER_OTLP_ENDPOINT." /usr/bin/sudo /usr/bin/mkdir -p /etc/otelcol/config.d /usr/bin/sudo /usr/bin/touch /etc/otelcol/config.d/github.yaml /usr/bin/sudo /usr/bin/tee /etc/otelcol/config.d/github.yaml < Date: Thu, 23 Apr 2026 16:49:46 +0800 Subject: [PATCH 14/15] Fix lints --- .../openstack_cloud/openstack_runner_manager.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py index a8e3d17f11..d1b1bc0fbf 100644 --- a/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py +++ b/github-runner-manager/src/github_runner_manager/openstack_cloud/openstack_runner_manager.py @@ -170,7 +170,12 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: if service_config.ssh_debug_connections else None ) - otel_collector_endpoint = (f"{otel_collector_config.host}:{otel_collector_config.port}" if otel_collector_config else "") + otel_collector_config = service_config.otel_collector_config + otel_collector_endpoint = ( + f"{otel_collector_config.host}:{otel_collector_config.port}" + if otel_collector_config + else "" + ) env_contents = jinja.get_template("env.j2").render( pre_job_script=str(PRE_JOB_SCRIPT), dockerhub_mirror=service_config.dockerhub_mirror or "", @@ -178,7 +183,6 @@ def _generate_cloud_init(self, runner_context: RunnerContext) -> str: tmate_server_proxy=runner_http_proxy, otel_collector_endpoint=otel_collector_endpoint, ) - otel_collector_config = service_config.otel_collector_config pre_job_contents_dict = { "issue_metrics": True, "metrics_exchange_path": str(METRICS_EXCHANGE_PATH), From 5c0b136d78d6c481ade5fdb6cb98089367f402fe Mon Sep 17 00:00:00 2001 From: yhaliaw Date: Fri, 24 Apr 2026 10:29:14 +0800 Subject: [PATCH 15/15] Add changelog --- docs/changelog.md | 4 ++++ .../src/github_runner_manager/templates/env.j2 | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index dd6187cd93..efe1af49c0 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,10 @@ This changelog documents user-relevant changes to the GitHub runner charm. +## 2026-04-27 + +- Added configuration option `otel-collector-endpoint` to enable the otel-collector to export metric. Setting this configuration option will add the environment variable ACTION_OTEL_EXPORTER_OTLP_ENDPOINT to the runner, which allow users to configure their own metrics to be exported. + ## 2026-04-22 - Removed `KillMode=process` from the runner manager systemd service, restoring the default `control-group` kill mode. This ensures all child processes in the service's cgroup are properly terminated when the service stops, preventing orphaned runner processes. diff --git a/github-runner-manager/src/github_runner_manager/templates/env.j2 b/github-runner-manager/src/github_runner_manager/templates/env.j2 index d210dd396a..ac701ce6bc 100644 --- a/github-runner-manager/src/github_runner_manager/templates/env.j2 +++ b/github-runner-manager/src/github_runner_manager/templates/env.j2 @@ -17,4 +17,4 @@ TMATE_SERVER_PORT={{ssh_debug_info.local_proxy_port}} {% endif %} {% if otel_collector_endpoint %} ACTION_OTEL_EXPORTER_OTLP_ENDPOINT={{otel_collector_endpoint}} -{% endif %} \ No newline at end of file +{% endif %}