From 7a8dcd7682cffd68a375dc52ddcb0029e61defc2 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 26 Jun 2025 08:32:47 +0200 Subject: [PATCH] Increase logging for external commands The `external_commands.run()` function now logs all environment variables. Before it only logged extra environment variables defined in a plugin. The logs were missing information about global environment variables, e.g. env vars defined in a build container. These additional env vars influence a build, too. The run log file now contains the command, env vars, and current working directory, too. Potentially sensitive env vars are masked. This is an additional precaution. GitHub and GitLab already mask sensitive values in logs. Signed-off-by: Christian Heimes --- src/fromager/external_commands.py | 33 ++++++++++++++++++++++++------ tests/test_external_commands.py | 34 ++++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/src/fromager/external_commands.py b/src/fromager/external_commands.py index 3ab57482..6f0f25ae 100644 --- a/src/fromager/external_commands.py +++ b/src/fromager/external_commands.py @@ -1,6 +1,7 @@ import logging import os import pathlib +import re import shlex import subprocess import sys @@ -46,6 +47,23 @@ class NetworkIsolationError(subprocess.CalledProcessError): pass +# *PAT (GitLab), SECRET, PASSWORD, PASSPHRASE, CRED(entials), TOKEN +SENSITIVE_KEYS = re.compile( + "^(*.PAT|.*SECRET.*|.*PASSWORD.*|.*PASSPHRASE.*|.*CRED.*|.*TOKEN.*)$", + re.IGNORECASE, +) + + +def mask_envvars(env: dict[str, typing.Any]) -> typing.Iterator[tuple[str, str]]: + """Mask sensistive env keys, shlex quote others""" + for key, value in sorted(env.items()): + if SENSITIVE_KEYS.match(key): + value = "'[MASKED]'" + else: + value = shlex.quote(str(value)) + yield key, value + + # based on pyproject_hooks/_impl.py: quiet_subprocess_runner def run( cmd: typing.Sequence[str], @@ -70,14 +88,17 @@ def run( *cmd, ] - logger.debug( - "running: %s %s in %s", - " ".join(f"{k}={shlex.quote(v)}" for k, v in extra_environ.items()), - " ".join(shlex.quote(str(s)) for s in cmd), - cwd or ".", - ) + cmd_str = " ".join(shlex.quote(str(s)) for s in cmd) + env_str = " ".join(f"{k}={v}" for k, v in mask_envvars(env)) + cwd_str = cwd or os.path.abspath(os.getcwd()) + + logger.debug("running: %s %s in %s", cmd_str, env_str, cwd_str) if log_filename: with open(log_filename, "w") as log_file: + print(f"cmd: {cmd_str}", file=log_file) + print(f"env: {env_str}", file=log_file) + print(f"cwd: {cwd_str}", file=log_file) + log_file.flush() completed = subprocess.run( cmd, cwd=cwd, diff --git a/tests/test_external_commands.py b/tests/test_external_commands.py index 84d9cdac..ee6c4a6c 100644 --- a/tests/test_external_commands.py +++ b/tests/test_external_commands.py @@ -1,5 +1,6 @@ import os import pathlib +import shlex import subprocess import typing from unittest import mock @@ -8,6 +9,25 @@ from fromager import external_commands +MASKED = "'[MASKED]'" + + +@pytest.mark.parametrize( + "key,value,expected_value", + [ + ("KEY", "value", "value"), + ("KEY", "value;value", "'value;value'"), + ("PATH", "/bin:/sbin", "/bin:/sbin"), + ("BOT_PAT", "value", MASKED), + ("MY_SECRET", "value", MASKED), + ("MY_CREDENTIALS", "value", MASKED), + ("SOME_PASSWORD_VALUE", "value", MASKED), + ], +) +def test_mask_envvars(key: str, value: str, expected_value: str): + out = dict(external_commands.mask_envvars({key: value})) + assert out == {key: expected_value} + def test_external_commands_environ(): env = {"BLAH": "test"} @@ -18,15 +38,23 @@ def test_external_commands_environ(): def test_external_commands_log_file(tmp_path): log_filename = pathlib.Path(tmp_path) / "test.log" env = {"BLAH": "test"} + cmd = ["sh", "-c", "echo $BLAH"] output = external_commands.run( - ["sh", "-c", "echo $BLAH"], + cmd, extra_environ=env, log_filename=log_filename, ) - assert "test\n" == output + assert output.endswith("test\n") + assert log_filename.exists() file_contents = log_filename.read_text() - assert "test\n" == file_contents + assert file_contents == output + + file_lines = file_contents.split("\n") + assert file_lines[0] == f"cmd: {shlex.join(cmd)}" + assert file_lines[1].startswith("env:") + assert file_lines[2].startswith("cwd:") + assert file_lines[3] == "test" @mock.patch("subprocess.run", return_value=mock.Mock(returncode=0))