From 9dff3196eec5d7c16cd98eec434474fa0d99ac36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Mon, 28 Jul 2025 14:08:31 +0200 Subject: [PATCH 1/2] tests/kgo: handle errors when polling for verifier status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It sometimes happens that the verifier status request returns an error due to transient network issue or application being slow to start. In this case the test failed completely as the producer status wasn't reported even if the producer finished successfully. Added error handling to the status loop to prevent those errors from failing the tests. Signed-off-by: Michał Maślanka --- tests/rptest/services/kgo_verifier_services.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/rptest/services/kgo_verifier_services.py b/tests/rptest/services/kgo_verifier_services.py index ccf28d9eda7e5..cc3ac6650da45 100644 --- a/tests/rptest/services/kgo_verifier_services.py +++ b/tests/rptest/services/kgo_verifier_services.py @@ -15,11 +15,13 @@ import threading import requests from typing import Any, Dict, Optional +from requests.adapters import HTTPAdapter from ducktape.cluster.cluster import ClusterNode from ducktape.services.service import Service from ducktape.utils.util import wait_until from ducktape.cluster.remoteaccount import RemoteCommandError +from urllib3 import Retry from rptest.services.redpanda import RedpandaService @@ -442,11 +444,20 @@ def _ingest_status(self, worker_statuses): self._parent._status = reduced def poll_status(self): + retry_strategy = Retry(total=5, + connect=5, + read=5, + backoff_factor=0.3, + status=5, + allowed_methods=['GET'], + status_forcelist=[503, 504]) + session = requests.Session() + session.mount("http://", HTTPAdapter(max_retries=retry_strategy)) + while not self._stop_requested.is_set(): drop_out = self._shutdown_requested.is_set() - - r = requests.get(self._parent._remote_url(self._node, "status"), - timeout=5) + r = session.get(url=self._parent._remote_url(self._node, "status"), + timeout=5) r.raise_for_status() worker_statuses = r.json() self._ingest_status(worker_statuses) From c7237ba7dd973dd766e079bea90202ef0a6f2ba5 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Wed, 30 Jul 2025 21:31:15 +0100 Subject: [PATCH 2/2] tests: upgrade kgo-verifier with startup race condition fix --- tests/docker/ducktape-deps/kgo-verifier | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docker/ducktape-deps/kgo-verifier b/tests/docker/ducktape-deps/kgo-verifier index 22abdbf2428a5..2ac38032b66fb 100644 --- a/tests/docker/ducktape-deps/kgo-verifier +++ b/tests/docker/ducktape-deps/kgo-verifier @@ -2,6 +2,6 @@ set -e git -C /opt clone https://github.com/redpanda-data/kgo-verifier.git cd /opt/kgo-verifier -git reset --hard 319c69da3d7690e26d390261c4b331703b99260c +git reset --hard f97884cb0815d60eca41950f818d51c70b665890 go mod tidy make