From 5dba0f43fde421cce74dfa9b67df133197ad31bf Mon Sep 17 00:00:00 2001 From: Ben Lucas Date: Mon, 2 Mar 2026 17:41:38 -0700 Subject: [PATCH 1/6] feat(health): gate connection info on container health check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a lazy health-check gate that delays showing connection info until Docker confirms the container/service is accepting connections. - Model: add `healthy` Boolean column to DockerChallengeTracker (default False for new inserts; server_default 1 preserves existing rows) - Migration: _ensure_healthy_column() adds the column on plugin upgrade - Functions: add get_container_states(), get_service_states(), and _CachedDockerState (5s TTL) for batched Docker API calls — O(1) calls per poll window regardless of user count - API: DockerStatus.get() checks health via batched state lookup; returns status='starting' (no ports/host) or status='running' (with connection info); cleans up dead tracker entries - Frontend: containerStarting state shows spinner with 3s rapid poll during startup; connection info appears only once status='running' - Tests: 30 new tests covering state mapping, cache TTL, and API endpoint behaviour for starting/running/dead container scenarios --- docker_challenges/__init__.py | 27 ++ docker_challenges/api/api.py | 112 ++++- docker_challenges/assets/view.html | 24 +- docker_challenges/assets/view.js | 34 +- docker_challenges/constants.py | 3 + docker_challenges/functions/general.py | 131 +++++- docker_challenges/models/models.py | 3 + tests/stubs/ctfd_stubs.py | 5 + tests/test_container_health.py | 582 +++++++++++++++++++++++++ 9 files changed, 889 insertions(+), 32 deletions(-) create mode 100644 tests/test_container_health.py diff --git a/docker_challenges/__init__.py b/docker_challenges/__init__.py index 1304a03..42b94e4 100644 --- a/docker_challenges/__init__.py +++ b/docker_challenges/__init__.py @@ -282,8 +282,35 @@ def docker_secrets(): app.register_blueprint(admin_docker_secrets) +def _ensure_healthy_column(app) -> None: + """Add the `healthy` column to docker_challenge_tracker if it doesn't exist. + + Handles upgrades from plugin versions that predate the health-check feature. + Existing rows get server_default=1 (healthy=True) so running containers + remain visible after upgrade. + """ + try: + from sqlalchemy import inspect, text + + inspector = inspect(app.db.engine) + columns = [col["name"] for col in inspector.get_columns("docker_challenge_tracker")] + if "healthy" not in columns: + with app.db.engine.connect() as conn: + conn.execute( + text( + "ALTER TABLE docker_challenge_tracker " + "ADD COLUMN healthy BOOLEAN NOT NULL DEFAULT 1" + ) + ) + conn.commit() + logging.info("docker_challenge_tracker: added 'healthy' column") + except Exception as err: + logging.warning("Could not ensure 'healthy' column on docker_challenge_tracker: %s", err) + + def load(app): app.db.create_all() + _ensure_healthy_column(app) CHALLENGE_CLASSES["docker"] = DockerChallengeType CHALLENGE_CLASSES["docker_service"] = DockerServiceChallengeType diff --git a/docker_challenges/api/api.py b/docker_challenges/api/api.py index 35c5196..2092257 100644 --- a/docker_challenges/api/api.py +++ b/docker_challenges/api/api.py @@ -18,9 +18,11 @@ from ..functions.general import ( create_secret, delete_secret, + get_container_states, get_repositories, get_required_ports, get_secrets, + get_service_states, get_unavailable_ports, is_swarm_mode, ) @@ -310,6 +312,7 @@ def _track_container( instance_id=instance_id, ports=",".join(ports), host=str(docker.hostname).split(":")[0], + healthy=False, ) db.session.add(entry) db.session.commit() @@ -365,40 +368,107 @@ def post(self): }, 201 +def _get_tracker_for_session() -> list: + """Return tracker entries for the current authenticated session.""" + if is_teams_mode(): + session = get_current_team() + return list(DockerChallengeTracker.query.filter_by(team_id=session.id)) + session = get_current_user() + return list(DockerChallengeTracker.query.filter_by(user_id=session.id)) + + +def _is_service_challenge(challenge_id: int) -> bool: + """Check if a challenge ID belongs to a DockerServiceChallenge.""" + challenge = _get_challenge_by_id(challenge_id) + return challenge is not None and challenge.type == "docker_service" + + @active_docker_namespace.route("", methods=["POST", "GET"]) class DockerStatus(Resource): """ The Purpose of this API is to retrieve a public JSON string of all docker containers in use by the current team/user. + + Entries with healthy=False are checked against Docker to determine if the + container/service has started yet. Connection info is only returned once healthy. """ @authed_only def get(self): docker = DockerConfig.query.filter_by(id=1).first() - if is_teams_mode(): - session = get_current_team() - tracker = DockerChallengeTracker.query.filter_by(team_id=session.id) - else: - session = get_current_user() - tracker = DockerChallengeTracker.query.filter_by(user_id=session.id) + tracker = _get_tracker_for_session() + + # State dicts are cached (5s TTL) — cheap to call unconditionally + container_states = get_container_states(docker) + service_states = get_service_states(docker) + + db_dirty = False data = [] - for tracker_entry in tracker: - data.append( - { - "id": tracker_entry.id, - "team_id": tracker_entry.team_id, - "user_id": tracker_entry.user_id, - "challenge_id": tracker_entry.challenge_id, - "docker_image": tracker_entry.docker_image, - "timestamp": tracker_entry.timestamp, - "revert_time": tracker_entry.revert_time, - "instance_id": tracker_entry.instance_id, - "ports": tracker_entry.ports.split(","), - "host": str(docker.hostname).split(":")[0], - } - ) + + for entry in tracker: + if entry.healthy: + data.append(self._build_entry(entry, docker, status="running")) + else: + if self._process_unhealthy_entry( + entry, docker, container_states, service_states, data + ): + db_dirty = True + + if db_dirty: + db.session.commit() + return {"success": True, "data": data} + @staticmethod + def _process_unhealthy_entry( + entry: DockerChallengeTracker, + docker: DockerConfig, + container_states: dict[str, str], + service_states: dict[str, str], + data: list, + ) -> bool: + """Check Docker state for an unhealthy tracker entry and update accordingly. + + Returns True if the database was modified (healthy flag set or entry deleted). + """ + is_service = _is_service_challenge(entry.challenge_id) + state_dict = service_states if is_service else container_states + docker_status = state_dict.get(entry.instance_id, "") + + if docker_status == "running": + entry.healthy = True + data.append(DockerStatus._build_entry(entry, docker, status="running")) + return True + if docker_status == "starting": + data.append(DockerStatus._build_entry(entry, docker, status="starting")) + return False + # "stopped", "unhealthy", "" (not found), or unknown — clean up dead tracker entry + DockerChallengeTracker.query.filter_by(id=entry.id).delete() + return True + + @staticmethod + def _build_entry(entry: DockerChallengeTracker, docker: DockerConfig, status: str) -> dict: + """Build a response dict for a tracker entry. + + When status is 'starting', ports and host are omitted to prevent + showing connection info for a service not yet accepting connections. + """ + base = { + "id": entry.id, + "team_id": entry.team_id, + "user_id": entry.user_id, + "challenge_id": entry.challenge_id, + "docker_image": entry.docker_image, + "timestamp": entry.timestamp, + "revert_time": entry.revert_time, + "instance_id": entry.instance_id, + "status": status, + } + if status == "running": + base["ports"] = entry.ports.split(",") + base["host"] = str(docker.hostname).split(":")[0] + return base + @docker_namespace.route("", methods=["GET"]) class DockerAPI(Resource): diff --git a/docker_challenges/assets/view.html b/docker_challenges/assets/view.html index 17f9822..f703c67 100644 --- a/docker_challenges/assets/view.html +++ b/docker_challenges/assets/view.html @@ -5,13 +5,33 @@ x-data="containerStatus('{{ challenge.docker_image }}', '{{ challenge.id }}')" x-init="init()" > - -
+ +
Start Docker Instance
+ +
+
+
+ Loading... +
+
+ Container starting up... Connection info will appear when ready. +
+
+
+
dict[str, str]: + """Return cached container states, fetching fresh if cache is stale.""" + now = time.monotonic() + if now - cls._container_ts >= cls.TTL_SECONDS: + cls._container_states = _fetch_container_states(docker) + cls._container_ts = now + return cls._container_states + + @classmethod + def get_service_states(cls, docker: DockerConfig) -> dict[str, str]: + """Return cached service states, fetching fresh if cache is stale.""" + now = time.monotonic() + if now - cls._service_ts >= cls.TTL_SECONDS: + cls._service_states = _fetch_service_states(docker) + cls._service_ts = now + return cls._service_states + + +def _fetch_container_states(docker: DockerConfig) -> dict[str, str]: + """Fetch all container states in a single Docker API call. + + Returns: + Dict mapping container ID (full) to health status string: + "running", "starting", "unhealthy", or "stopped". + Returns empty dict on request failure. + """ + r = do_request(docker, "/containers/json?all=1") + if not r: + return {} + + try: + containers = r.json() + except Exception: + return {} + + result: dict[str, str] = {} + for container in containers: + container_id = container.get("Id", "") + if not container_id: + continue + + state = container.get("State", "") + status = container.get("Status", "") + + if state != "running": + result[container_id] = "stopped" + elif "(unhealthy)" in status: + result[container_id] = "unhealthy" + elif "(health: starting)" in status: + result[container_id] = "starting" + else: + # "running", "running (healthy)", or no health info → ready + result[container_id] = "running" + + return result + + +def _fetch_service_states(docker: DockerConfig) -> dict[str, str]: + """Fetch all service states in a single Docker API call via /tasks. + + Returns: + Dict mapping service ID to health status string: "running" or "starting". + Returns empty dict on request failure or non-swarm mode. + """ + r = do_request(docker, "/tasks") + if not r: + return {} + + try: + tasks = r.json() + except Exception: + return {} + + if isinstance(tasks, dict): + # Error response (e.g., not a swarm manager) + return {} + + # Group tasks by service and find the best state per service + service_states: dict[str, str] = {} + for task in tasks: + service_id = task.get("ServiceID", "") + if not service_id: + continue + + task_status = task.get("Status", {}) + task_state = task_status.get("State", "") + + if task_state == "running": + service_states[service_id] = "running" + elif service_id not in service_states: + # Any non-running state maps to "starting" unless we already have "running" + service_states[service_id] = "starting" + + return service_states + + +def get_container_states(docker: DockerConfig) -> dict[str, str]: + """Get health states for all containers, with short-TTL caching. + + Returns: + Dict mapping container ID to status: "running", "starting", "unhealthy", "stopped". + """ + return _CachedDockerState.get_container_states(docker) + + +def get_service_states(docker: DockerConfig) -> dict[str, str]: + """Get health states for all services, with short-TTL caching. + + Returns: + Dict mapping service ID to status: "running" or "starting". + """ + return _CachedDockerState.get_service_states(docker) + + def cleanup_container_on_solve( docker: DockerConfig, user: Any, diff --git a/docker_challenges/models/models.py b/docker_challenges/models/models.py index 6389beb..0be4865 100644 --- a/docker_challenges/models/models.py +++ b/docker_challenges/models/models.py @@ -35,6 +35,9 @@ class DockerChallengeTracker(db.Model): instance_id = db.Column("instance_id", db.String(128), index=True) ports = db.Column("ports", db.String(128), index=True) host = db.Column("host", db.String(128), index=True) + healthy = db.Column( + "healthy", db.Boolean, default=False, server_default=db.text("1"), index=True + ) class DockerConfigForm(BaseForm): diff --git a/tests/stubs/ctfd_stubs.py b/tests/stubs/ctfd_stubs.py index 5b5f34f..6cd6303 100644 --- a/tests/stubs/ctfd_stubs.py +++ b/tests/stubs/ctfd_stubs.py @@ -50,6 +50,11 @@ class _DB: ForeignKey = lambda self=None, *a, **kw: None session = MagicMock() + @staticmethod + def text(value): + """Stub for db.text() used in server_default.""" + return value + db = _DB() diff --git a/tests/test_container_health.py b/tests/test_container_health.py new file mode 100644 index 0000000..d38db55 --- /dev/null +++ b/tests/test_container_health.py @@ -0,0 +1,582 @@ +"""Tests for container health check feature. + +Covers: +- get_container_states() state mapping from Docker /containers/json +- get_service_states() state mapping from Docker /tasks +- _CachedDockerState TTL caching behaviour +- DockerStatus.get() endpoint: starting/running/cleanup behaviour +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest +import responses + +from docker_challenges.functions.general import ( + _CachedDockerState, + _fetch_container_states, + _fetch_service_states, + get_container_states, + get_service_states, +) + + +# =========================================================================== +# _fetch_container_states unit tests +# =========================================================================== + + +@pytest.mark.medium +@responses.activate +def test_fetch_container_states_running_no_healthcheck(mock_docker_config): + """Running container without HEALTHCHECK maps to 'running'.""" + responses.add( + responses.GET, + "http://localhost:2375/containers/json?all=1", + json=[{"Id": "abc123", "State": "running", "Status": "Up 5 minutes"}], + status=200, + ) + result = _fetch_container_states(mock_docker_config) + assert result == {"abc123": "running"} + + +@pytest.mark.medium +@responses.activate +def test_fetch_container_states_running_healthy(mock_docker_config): + """Running container with (healthy) status maps to 'running'.""" + responses.add( + responses.GET, + "http://localhost:2375/containers/json?all=1", + json=[{"Id": "abc123", "State": "running", "Status": "Up 5 minutes (healthy)"}], + status=200, + ) + result = _fetch_container_states(mock_docker_config) + assert result == {"abc123": "running"} + + +@pytest.mark.medium +@responses.activate +def test_fetch_container_states_running_health_starting(mock_docker_config): + """Running container with (health: starting) status maps to 'starting'.""" + responses.add( + responses.GET, + "http://localhost:2375/containers/json?all=1", + json=[{"Id": "abc123", "State": "running", "Status": "Up 2 seconds (health: starting)"}], + status=200, + ) + result = _fetch_container_states(mock_docker_config) + assert result == {"abc123": "starting"} + + +@pytest.mark.medium +@responses.activate +def test_fetch_container_states_running_unhealthy(mock_docker_config): + """Running container with (unhealthy) status maps to 'unhealthy'.""" + responses.add( + responses.GET, + "http://localhost:2375/containers/json?all=1", + json=[{"Id": "abc123", "State": "running", "Status": "Up 10 minutes (unhealthy)"}], + status=200, + ) + result = _fetch_container_states(mock_docker_config) + assert result == {"abc123": "unhealthy"} + + +@pytest.mark.medium +@responses.activate +def test_fetch_container_states_exited_container(mock_docker_config): + """Exited container maps to 'stopped'.""" + responses.add( + responses.GET, + "http://localhost:2375/containers/json?all=1", + json=[{"Id": "abc123", "State": "exited", "Status": "Exited (0) 1 hour ago"}], + status=200, + ) + result = _fetch_container_states(mock_docker_config) + assert result == {"abc123": "stopped"} + + +@pytest.mark.medium +@responses.activate +def test_fetch_container_states_multiple_containers(mock_docker_config): + """Multiple containers are all correctly mapped.""" + responses.add( + responses.GET, + "http://localhost:2375/containers/json?all=1", + json=[ + {"Id": "id1", "State": "running", "Status": "Up 1 minute"}, + {"Id": "id2", "State": "running", "Status": "Up 2 seconds (health: starting)"}, + {"Id": "id3", "State": "exited", "Status": "Exited (1) 5 minutes ago"}, + ], + status=200, + ) + result = _fetch_container_states(mock_docker_config) + assert result == {"id1": "running", "id2": "starting", "id3": "stopped"} + + +@pytest.mark.medium +def test_fetch_container_states_returns_empty_on_failure(mock_docker_config): + """Returns empty dict when Docker API is unreachable.""" + mock_docker_config.hostname = "" + result = _fetch_container_states(mock_docker_config) + assert result == {} + + +# =========================================================================== +# _fetch_service_states unit tests +# =========================================================================== + + +@pytest.mark.medium +@responses.activate +def test_fetch_service_states_running_task(mock_docker_config): + """Service with a running task maps to 'running'.""" + responses.add( + responses.GET, + "http://localhost:2375/tasks", + json=[ + {"ServiceID": "svc1", "Status": {"State": "running"}}, + ], + status=200, + ) + result = _fetch_service_states(mock_docker_config) + assert result == {"svc1": "running"} + + +@pytest.mark.medium +@responses.activate +def test_fetch_service_states_preparing_task(mock_docker_config): + """Service with only a preparing task maps to 'starting'.""" + responses.add( + responses.GET, + "http://localhost:2375/tasks", + json=[ + {"ServiceID": "svc1", "Status": {"State": "preparing"}}, + ], + status=200, + ) + result = _fetch_service_states(mock_docker_config) + assert result == {"svc1": "starting"} + + +@pytest.mark.medium +@responses.activate +def test_fetch_service_states_running_beats_starting(mock_docker_config): + """Service with both running and preparing tasks reports 'running'.""" + responses.add( + responses.GET, + "http://localhost:2375/tasks", + json=[ + {"ServiceID": "svc1", "Status": {"State": "preparing"}}, + {"ServiceID": "svc1", "Status": {"State": "running"}}, + ], + status=200, + ) + result = _fetch_service_states(mock_docker_config) + assert result == {"svc1": "running"} + + +@pytest.mark.medium +@responses.activate +def test_fetch_service_states_multiple_services(mock_docker_config): + """Multiple services are independently mapped.""" + responses.add( + responses.GET, + "http://localhost:2375/tasks", + json=[ + {"ServiceID": "svc1", "Status": {"State": "running"}}, + {"ServiceID": "svc2", "Status": {"State": "starting"}}, + ], + status=200, + ) + result = _fetch_service_states(mock_docker_config) + assert result["svc1"] == "running" + assert result["svc2"] == "starting" + + +@pytest.mark.medium +def test_fetch_service_states_returns_empty_on_failure(mock_docker_config): + """Returns empty dict when Docker API is unreachable.""" + mock_docker_config.hostname = "" + result = _fetch_service_states(mock_docker_config) + assert result == {} + + +@pytest.mark.medium +@responses.activate +def test_fetch_service_states_returns_empty_for_non_swarm(mock_docker_config): + """Returns empty dict when Docker returns a dict (not swarm manager).""" + responses.add( + responses.GET, + "http://localhost:2375/tasks", + json={"message": "This node is not a swarm manager."}, + status=503, + ) + result = _fetch_service_states(mock_docker_config) + assert result == {} + + +# =========================================================================== +# _CachedDockerState TTL cache tests +# =========================================================================== + + +@pytest.mark.medium +def test_cached_docker_state_returns_cached_result_within_ttl(mock_docker_config): + """_CachedDockerState returns cached state without re-fetching within TTL.""" + # Reset cache state + _CachedDockerState._container_states = {"id1": "running"} + _CachedDockerState._container_ts = float("inf") # Never expires + + with patch( + "docker_challenges.functions.general._fetch_container_states" + ) as mock_fetch: + result = get_container_states(mock_docker_config) + + mock_fetch.assert_not_called() + assert result == {"id1": "running"} + + # Cleanup + _CachedDockerState._container_ts = 0.0 + _CachedDockerState._container_states = {} + + +@pytest.mark.medium +def test_cached_docker_state_fetches_fresh_after_ttl(mock_docker_config): + """_CachedDockerState re-fetches when cache is expired (ts=0).""" + _CachedDockerState._container_states = {} + _CachedDockerState._container_ts = 0.0 # Expired immediately + + fresh_data = {"id2": "starting"} + + with patch( + "docker_challenges.functions.general._fetch_container_states", + return_value=fresh_data, + ) as mock_fetch: + result = get_container_states(mock_docker_config) + + mock_fetch.assert_called_once_with(mock_docker_config) + assert result == fresh_data + + # Cleanup + _CachedDockerState._container_ts = 0.0 + _CachedDockerState._container_states = {} + + +@pytest.mark.medium +def test_cached_service_state_returns_cached_result_within_ttl(mock_docker_config): + """_CachedDockerState returns cached service state without re-fetching within TTL.""" + _CachedDockerState._service_states = {"svc1": "running"} + _CachedDockerState._service_ts = float("inf") + + with patch( + "docker_challenges.functions.general._fetch_service_states" + ) as mock_fetch: + result = get_service_states(mock_docker_config) + + mock_fetch.assert_not_called() + assert result == {"svc1": "running"} + + # Cleanup + _CachedDockerState._service_ts = 0.0 + _CachedDockerState._service_states = {} + + +# =========================================================================== +# DockerStatus.get() endpoint behaviour +# =========================================================================== + + +class TestDockerStatusHealthGate: + """Integration-style tests for DockerStatus.get() health check logic.""" + + def _make_tracker_entry( + self, instance_id: str, challenge_id: int = 1, healthy: bool = False + ) -> MagicMock: + entry = MagicMock() + entry.id = 1 + entry.instance_id = instance_id + entry.challenge_id = challenge_id + entry.healthy = healthy + entry.team_id = None + entry.user_id = "1" + entry.docker_image = "nginx:latest" + entry.timestamp = 1000 + entry.revert_time = 1300 + entry.ports = "30001/tcp->80/tcp" + return entry + + @pytest.mark.medium + @patch("docker_challenges.api.api.db") + @patch("docker_challenges.api.api.get_service_states") + @patch("docker_challenges.api.api.get_container_states") + @patch("docker_challenges.api.api._is_service_challenge") + @patch("docker_challenges.api.api.DockerChallengeTracker") + @patch("docker_challenges.api.api.DockerConfig") + @patch("docker_challenges.api.api.is_teams_mode") + @patch("docker_challenges.api.api.get_current_user") + def test_healthy_entry_returns_running_status( + self, + mock_get_user, + mock_is_teams, + mock_config, + mock_tracker, + mock_is_service, + mock_container_states, + mock_service_states, + mock_db, + ): + """Tracker entry with healthy=True returns status='running' with ports and host.""" + from docker_challenges.api.api import DockerStatus + + mock_is_teams.return_value = False + mock_get_user.return_value = MagicMock(id="1") + mock_docker = MagicMock() + mock_docker.hostname = "docker.host:2376" + mock_config.query.filter_by.return_value.first.return_value = mock_docker + + entry = self._make_tracker_entry("cont_abc", healthy=True) + mock_tracker.query.filter_by.return_value.__iter__ = MagicMock(return_value=iter([entry])) + + api = DockerStatus() + result = api.get() + + assert result["success"] is True + assert len(result["data"]) == 1 + item = result["data"][0] + assert item["status"] == "running" + assert "ports" in item + assert "host" in item + + @pytest.mark.medium + @patch("docker_challenges.api.api.db") + @patch("docker_challenges.api.api.get_service_states") + @patch("docker_challenges.api.api.get_container_states") + @patch("docker_challenges.api.api._is_service_challenge") + @patch("docker_challenges.api.api.DockerChallengeTracker") + @patch("docker_challenges.api.api.DockerConfig") + @patch("docker_challenges.api.api.is_teams_mode") + @patch("docker_challenges.api.api.get_current_user") + def test_unhealthy_entry_starting_returns_starting_without_ports( + self, + mock_get_user, + mock_is_teams, + mock_config, + mock_tracker, + mock_is_service, + mock_container_states, + mock_service_states, + mock_db, + ): + """Unhealthy entry with Docker status 'starting' returns status='starting' without ports.""" + from docker_challenges.api.api import DockerStatus + + mock_is_teams.return_value = False + mock_get_user.return_value = MagicMock(id="1") + mock_docker = MagicMock() + mock_docker.hostname = "docker.host:2376" + mock_config.query.filter_by.return_value.first.return_value = mock_docker + + entry = self._make_tracker_entry("cont_abc", healthy=False) + mock_tracker.query.filter_by.return_value.__iter__ = MagicMock(return_value=iter([entry])) + + mock_is_service.return_value = False + mock_container_states.return_value = {"cont_abc": "starting"} + mock_service_states.return_value = {} + + api = DockerStatus() + result = api.get() + + assert result["success"] is True + assert len(result["data"]) == 1 + item = result["data"][0] + assert item["status"] == "starting" + assert "ports" not in item + assert "host" not in item + # healthy flag should NOT have been updated + assert entry.healthy is False + + @pytest.mark.medium + @patch("docker_challenges.api.api.db") + @patch("docker_challenges.api.api.get_service_states") + @patch("docker_challenges.api.api.get_container_states") + @patch("docker_challenges.api.api._is_service_challenge") + @patch("docker_challenges.api.api.DockerChallengeTracker") + @patch("docker_challenges.api.api.DockerConfig") + @patch("docker_challenges.api.api.is_teams_mode") + @patch("docker_challenges.api.api.get_current_user") + def test_unhealthy_entry_transitions_to_running( + self, + mock_get_user, + mock_is_teams, + mock_config, + mock_tracker, + mock_is_service, + mock_container_states, + mock_service_states, + mock_db, + ): + """Unhealthy entry with Docker status 'running' transitions to healthy=True.""" + from docker_challenges.api.api import DockerStatus + + mock_is_teams.return_value = False + mock_get_user.return_value = MagicMock(id="1") + mock_docker = MagicMock() + mock_docker.hostname = "docker.host:2376" + mock_config.query.filter_by.return_value.first.return_value = mock_docker + + entry = self._make_tracker_entry("cont_abc", healthy=False) + mock_tracker.query.filter_by.return_value.__iter__ = MagicMock(return_value=iter([entry])) + + mock_is_service.return_value = False + mock_container_states.return_value = {"cont_abc": "running"} + mock_service_states.return_value = {} + + api = DockerStatus() + result = api.get() + + assert result["success"] is True + assert len(result["data"]) == 1 + item = result["data"][0] + assert item["status"] == "running" + assert "ports" in item + assert "host" in item + # healthy flag should have been updated + assert entry.healthy is True + mock_db.session.commit.assert_called_once() + + @pytest.mark.medium + @patch("docker_challenges.api.api.db") + @patch("docker_challenges.api.api.get_service_states") + @patch("docker_challenges.api.api.get_container_states") + @patch("docker_challenges.api.api._is_service_challenge") + @patch("docker_challenges.api.api.DockerChallengeTracker") + @patch("docker_challenges.api.api.DockerConfig") + @patch("docker_challenges.api.api.is_teams_mode") + @patch("docker_challenges.api.api.get_current_user") + def test_dead_container_cleans_up_tracker_entry( + self, + mock_get_user, + mock_is_teams, + mock_config, + mock_tracker, + mock_is_service, + mock_container_states, + mock_service_states, + mock_db, + ): + """Unhealthy entry not found in Docker states removes tracker entry.""" + from docker_challenges.api.api import DockerStatus + + mock_is_teams.return_value = False + mock_get_user.return_value = MagicMock(id="1") + mock_docker = MagicMock() + mock_docker.hostname = "docker.host:2376" + mock_config.query.filter_by.return_value.first.return_value = mock_docker + + entry = self._make_tracker_entry("cont_dead", healthy=False) + mock_tracker.query.filter_by.return_value.__iter__ = MagicMock(return_value=iter([entry])) + + mock_is_service.return_value = False + mock_container_states.return_value = {} # Container not found + mock_service_states.return_value = {} + + api = DockerStatus() + result = api.get() + + assert result["success"] is True + # Dead container should NOT appear in response data + assert len(result["data"]) == 0 + # Tracker entry should be deleted + mock_tracker.query.filter_by.assert_called_with(id=entry.id) + mock_tracker.query.filter_by.return_value.delete.assert_called_once() + mock_db.session.commit.assert_called_once() + + @pytest.mark.medium + @patch("docker_challenges.api.api.db") + @patch("docker_challenges.api.api.get_service_states") + @patch("docker_challenges.api.api.get_container_states") + @patch("docker_challenges.api.api._is_service_challenge") + @patch("docker_challenges.api.api.DockerChallengeTracker") + @patch("docker_challenges.api.api.DockerConfig") + @patch("docker_challenges.api.api.is_teams_mode") + @patch("docker_challenges.api.api.get_current_user") + def test_stopped_container_cleans_up_tracker_entry( + self, + mock_get_user, + mock_is_teams, + mock_config, + mock_tracker, + mock_is_service, + mock_container_states, + mock_service_states, + mock_db, + ): + """Unhealthy entry with Docker status 'stopped' removes tracker entry.""" + from docker_challenges.api.api import DockerStatus + + mock_is_teams.return_value = False + mock_get_user.return_value = MagicMock(id="1") + mock_docker = MagicMock() + mock_docker.hostname = "docker.host:2376" + mock_config.query.filter_by.return_value.first.return_value = mock_docker + + entry = self._make_tracker_entry("cont_stopped", healthy=False) + mock_tracker.query.filter_by.return_value.__iter__ = MagicMock(return_value=iter([entry])) + + mock_is_service.return_value = False + mock_container_states.return_value = {"cont_stopped": "stopped"} + mock_service_states.return_value = {} + + api = DockerStatus() + result = api.get() + + assert result["success"] is True + assert len(result["data"]) == 0 + mock_db.session.commit.assert_called_once() + + @pytest.mark.medium + @patch("docker_challenges.api.api.db") + @patch("docker_challenges.api.api.get_service_states") + @patch("docker_challenges.api.api.get_container_states") + @patch("docker_challenges.api.api._is_service_challenge") + @patch("docker_challenges.api.api.DockerChallengeTracker") + @patch("docker_challenges.api.api.DockerConfig") + @patch("docker_challenges.api.api.is_teams_mode") + @patch("docker_challenges.api.api.get_current_user") + def test_service_challenge_uses_service_states( + self, + mock_get_user, + mock_is_teams, + mock_config, + mock_tracker, + mock_is_service, + mock_container_states, + mock_service_states, + mock_db, + ): + """Service challenges look up states in service_states dict, not container_states.""" + from docker_challenges.api.api import DockerStatus + + mock_is_teams.return_value = False + mock_get_user.return_value = MagicMock(id="1") + mock_docker = MagicMock() + mock_docker.hostname = "docker.host:2376" + mock_config.query.filter_by.return_value.first.return_value = mock_docker + + entry = self._make_tracker_entry("svc_abc", challenge_id=5, healthy=False) + mock_tracker.query.filter_by.return_value.__iter__ = MagicMock(return_value=iter([entry])) + + mock_is_service.return_value = True # This is a service challenge + mock_container_states.return_value = {} # Not in container states + mock_service_states.return_value = {"svc_abc": "running"} + + api = DockerStatus() + result = api.get() + + assert result["success"] is True + assert len(result["data"]) == 1 + assert result["data"][0]["status"] == "running" + assert entry.healthy is True From 9b6dbae915f2bed52b1519be3225e99c42fa7aa0 Mon Sep 17 00:00:00 2001 From: Ben Lucas Date: Mon, 2 Mar 2026 17:44:48 -0700 Subject: [PATCH 2/6] fix(health): use engine.begin() for SQLAlchemy 1.4 compatibility in migration --- docker_challenges/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker_challenges/__init__.py b/docker_challenges/__init__.py index 42b94e4..20bb47f 100644 --- a/docker_challenges/__init__.py +++ b/docker_challenges/__init__.py @@ -295,14 +295,13 @@ def _ensure_healthy_column(app) -> None: inspector = inspect(app.db.engine) columns = [col["name"] for col in inspector.get_columns("docker_challenge_tracker")] if "healthy" not in columns: - with app.db.engine.connect() as conn: + with app.db.engine.begin() as conn: conn.execute( text( "ALTER TABLE docker_challenge_tracker " "ADD COLUMN healthy BOOLEAN NOT NULL DEFAULT 1" ) ) - conn.commit() logging.info("docker_challenge_tracker: added 'healthy' column") except Exception as err: logging.warning("Could not ensure 'healthy' column on docker_challenge_tracker: %s", err) From 8aa440a9f968e07284e849572ad2028dd560230e Mon Sep 17 00:00:00 2001 From: Ben Lucas Date: Mon, 2 Mar 2026 17:51:00 -0700 Subject: [PATCH 3/6] fix(health): treat newly created/not-found containers as starting, not dead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Container state mapping was too aggressive: - 'created' state (Docker container just started) mapped to 'stopped' → deletion - '' (not found in states dict) also triggered deletion Fixes: - _fetch_container_states: map 'created'/'restarting' to 'starting'; only 'exited'/'dead'/'removing'/'paused' map to 'stopped' - _process_unhealthy_entry: treat '' (not found) as 'starting' — container may simply not be visible in Docker yet immediately after creation - Tests updated + new test for not-found-shows-starting behaviour --- docker_challenges/api/api.py | 6 ++- docker_challenges/functions/general.py | 5 ++- tests/test_container_health.py | 62 +++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/docker_challenges/api/api.py b/docker_challenges/api/api.py index 2092257..73168aa 100644 --- a/docker_challenges/api/api.py +++ b/docker_challenges/api/api.py @@ -439,10 +439,12 @@ def _process_unhealthy_entry( entry.healthy = True data.append(DockerStatus._build_entry(entry, docker, status="running")) return True - if docker_status == "starting": + if docker_status in ("starting", ""): + # "starting": healthcheck not yet passed + # "": not yet visible in Docker (just created) — treat as starting data.append(DockerStatus._build_entry(entry, docker, status="starting")) return False - # "stopped", "unhealthy", "" (not found), or unknown — clean up dead tracker entry + # "stopped", "unhealthy", or unknown explicit state — clean up dead tracker entry DockerChallengeTracker.query.filter_by(id=entry.id).delete() return True diff --git a/docker_challenges/functions/general.py b/docker_challenges/functions/general.py index b9f24ef..2d81ee7 100644 --- a/docker_challenges/functions/general.py +++ b/docker_challenges/functions/general.py @@ -530,8 +530,11 @@ def _fetch_container_states(docker: DockerConfig) -> dict[str, str]: state = container.get("State", "") status = container.get("Status", "") - if state != "running": + if state in ("exited", "dead", "removing", "paused"): result[container_id] = "stopped" + elif state != "running": + # "created", "restarting", or unknown — transient startup state + result[container_id] = "starting" elif "(unhealthy)" in status: result[container_id] = "unhealthy" elif "(health: starting)" in status: diff --git a/tests/test_container_health.py b/tests/test_container_health.py index d38db55..cfc21eb 100644 --- a/tests/test_container_health.py +++ b/tests/test_container_health.py @@ -98,6 +98,20 @@ def test_fetch_container_states_exited_container(mock_docker_config): assert result == {"abc123": "stopped"} +@pytest.mark.medium +@responses.activate +def test_fetch_container_states_created_container_is_starting(mock_docker_config): + """Container in 'created' state (just started) maps to 'starting', not 'stopped'.""" + responses.add( + responses.GET, + "http://localhost:2375/containers/json?all=1", + json=[{"Id": "abc123", "State": "created", "Status": "Created"}], + status=200, + ) + result = _fetch_container_states(mock_docker_config) + assert result == {"abc123": "starting"} + + @pytest.mark.medium @responses.activate def test_fetch_container_states_multiple_containers(mock_docker_config): @@ -480,7 +494,7 @@ def test_dead_container_cleans_up_tracker_entry( mock_tracker.query.filter_by.return_value.__iter__ = MagicMock(return_value=iter([entry])) mock_is_service.return_value = False - mock_container_states.return_value = {} # Container not found + mock_container_states.return_value = {"cont_dead": "stopped"} mock_service_states.return_value = {} api = DockerStatus() @@ -494,6 +508,52 @@ def test_dead_container_cleans_up_tracker_entry( mock_tracker.query.filter_by.return_value.delete.assert_called_once() mock_db.session.commit.assert_called_once() + @pytest.mark.medium + @patch("docker_challenges.api.api.db") + @patch("docker_challenges.api.api.get_service_states") + @patch("docker_challenges.api.api.get_container_states") + @patch("docker_challenges.api.api._is_service_challenge") + @patch("docker_challenges.api.api.DockerChallengeTracker") + @patch("docker_challenges.api.api.DockerConfig") + @patch("docker_challenges.api.api.is_teams_mode") + @patch("docker_challenges.api.api.get_current_user") + def test_not_found_in_states_shows_starting( + self, + mock_get_user, + mock_is_teams, + mock_config, + mock_tracker, + mock_is_service, + mock_container_states, + mock_service_states, + mock_db, + ): + """Container not in states dict (just created) shows as 'starting', not deleted.""" + from docker_challenges.api.api import DockerStatus + + mock_is_teams.return_value = False + mock_get_user.return_value = MagicMock(id="1") + mock_docker = MagicMock() + mock_docker.hostname = "docker.host:2376" + mock_config.query.filter_by.return_value.first.return_value = mock_docker + + entry = self._make_tracker_entry("cont_new", healthy=False) + mock_tracker.query.filter_by.return_value.__iter__ = MagicMock(return_value=iter([entry])) + + mock_is_service.return_value = False + mock_container_states.return_value = {} # Not yet visible in Docker + mock_service_states.return_value = {} + + api = DockerStatus() + result = api.get() + + assert result["success"] is True + assert len(result["data"]) == 1 + assert result["data"][0]["status"] == "starting" + assert "ports" not in result["data"][0] + # Entry should NOT be deleted + mock_tracker.query.filter_by.return_value.delete.assert_not_called() + @pytest.mark.medium @patch("docker_challenges.api.api.db") @patch("docker_challenges.api.api.get_service_states") From b615ce73ba70cb3bd0a06ddc57b4a86ae0c54ea1 Mon Sep 17 00:00:00 2001 From: Ben Lucas Date: Mon, 2 Mar 2026 18:07:19 -0700 Subject: [PATCH 4/6] fix(services): recover existing service on 409 name conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a service exists on Docker but the tracker entry is gone (e.g. after CTFd restart with in-flight containers), creation returns 409. Previously this silently returned (None, None) → 500 to the user. Now mirrors the find_existing() pattern used by create_container(): on 409, query the existing service by name and return its ID and actual port configuration so the tracker entry can be recreated. --- docker_challenges/functions/services.py | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/docker_challenges/functions/services.py b/docker_challenges/functions/services.py index 5374dec..55abf6d 100644 --- a/docker_challenges/functions/services.py +++ b/docker_challenges/functions/services.py @@ -100,6 +100,39 @@ def _build_secrets_list(challenge: DockerServiceChallenge, docker: DockerConfig) return secrets_list +def find_existing_service(docker: DockerConfig, service_name: str) -> tuple[str | None, str | None]: + """Find an existing Docker Swarm service by name. + + Used to recover a service ID when creation returns 409 (name conflict), + matching the same pattern as find_existing() for containers. + + Returns: + Tuple of (service_id, service_data_json) on success, or (None, None) if not found. + service_data_json is formatted with EndpointSpec.Ports for port extraction. + """ + r = do_request(docker, f'/services?filters={{"name":["{service_name}"]}}') + if not r: + return None, None + + try: + services = r.json() + except Exception: + return None, None + + if not isinstance(services, list): + return None, None + + for service in services: + spec = service.get("Spec", {}) + if spec.get("Name") == service_name: + service_id = service.get("ID") + ports = spec.get("EndpointSpec", {}).get("Ports", []) + data = json.dumps({"EndpointSpec": {"Ports": ports}}) + return service_id, data + + return None, None + + def create_service( docker: DockerConfig, challenge_id: int, image: str, team: str, portbl: list ) -> tuple[str | None, str | None]: @@ -146,6 +179,14 @@ def create_service( if not r: return None, None + if r.status_code == 409: + # Service name conflict — tracker entry may have been lost while the service survived. + # Recover the existing service ID (mirrors find_existing() pattern for containers). + logging.warning( + "Service name conflict for %s — recovering existing service ID", service_name + ) + return find_existing_service(docker, service_name) + instance_id = r.json().get("ID") if not instance_id: logging.error("Unable to create service %s with image %s", service_name, image) From 823610d27cb339a5cbffecb92c4dccc42732af75 Mon Sep 17 00:00:00 2001 From: Ben Lucas Date: Mon, 2 Mar 2026 18:13:38 -0700 Subject: [PATCH 5/6] fix(services): use 'is None' check instead of falsiness for Docker responses requests.Response.__bool__ returns False for any 4xx response, so 'if not r:' was short-circuiting the 409 conflict-recovery code before it could fire. Both create_service and create_container were affected. Change 'if not r:' to 'if r is None:' so only true network failures (where do_request returns None) trigger the early return, while 409 responses correctly fall through to the name-conflict recovery path. --- docker_challenges/functions/containers.py | 2 +- docker_challenges/functions/services.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker_challenges/functions/containers.py b/docker_challenges/functions/containers.py index 2dee2bd..bb98e40 100644 --- a/docker_challenges/functions/containers.py +++ b/docker_challenges/functions/containers.py @@ -104,7 +104,7 @@ def create_container( method="POST", data=data, ) - if not r: + if r is None: return None, None instance_id = find_existing(docker, container_name) if r.status_code == 409 else r.json()["Id"] if instance_id is None: diff --git a/docker_challenges/functions/services.py b/docker_challenges/functions/services.py index 55abf6d..3076f0b 100644 --- a/docker_challenges/functions/services.py +++ b/docker_challenges/functions/services.py @@ -176,7 +176,7 @@ def create_service( # Create service and handle response r = do_request(docker, url="/services/create", method="POST", data=data) - if not r: + if r is None: return None, None if r.status_code == 409: From dcfa67d5e0fbd88cd13e9070461ed2b3109a6a40 Mon Sep 17 00:00:00 2001 From: Ben Lucas Date: Mon, 2 Mar 2026 18:17:11 -0700 Subject: [PATCH 6/6] feat(admin): show container health status badge on docker status page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a Status column to the admin docker status table showing 'Running' (green) or 'Starting' (yellow) based on the healthy flag. No backend changes required — healthy is already in the tracker model. --- docker_challenges/templates/admin_docker_status.html | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docker_challenges/templates/admin_docker_status.html b/docker_challenges/templates/admin_docker_status.html index b331817..47f9698 100644 --- a/docker_challenges/templates/admin_docker_status.html +++ b/docker_challenges/templates/admin_docker_status.html @@ -39,6 +39,7 @@

Docker Status

Instance ID + Status Revoke @@ -57,6 +58,13 @@

Docker Status

{{docker.instance_id | truncate(15)}} + + {% if docker.healthy %} + Running + {% else %} + Starting + {% endif %} +