Skip to content

Commit f6797df

Browse files
committed
fixup! Ensure that bad HTTP status codes result in the producer retrying
1 parent 712cb94 commit f6797df

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

opsqueue/src/producer/client.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::time::Duration;
33
use backon::BackoffBuilder;
44
use backon::FibonacciBuilder;
55
use backon::Retryable;
6+
use http::StatusCode;
67

78
use crate::common::submission::{SubmissionId, SubmissionStatus};
89
use crate::tracing::CarrierMap;
@@ -162,15 +163,18 @@ impl Client {
162163

163164
/// Get the server's version from the `/version` endpoint.
164165
///
165-
/// The result will be the value of [`VERSION_CARGO_SEMVER`][crate::VERSION_CARGO_SEMVER]
166+
/// A successful result will be the value of [`VERSION_CARGO_SEMVER`][crate::VERSION_CARGO_SEMVER]
166167
/// prefixed with a "v", for example `v0.30.5`.
168+
///
169+
/// Upon connection failure, this will not attempt to do any retrying.
167170
pub async fn server_version(&self) -> Result<String, InternalProducerClientError> {
168171
let base_url = &self.base_url;
169172
let resp = self
170173
.http_client
171174
.get(format!("{base_url}/version"))
172175
.send()
173-
.await?;
176+
.await?
177+
.error_for_status()?;
174178
let text = resp.text().await?;
175179
Ok(text)
176180
}
@@ -191,10 +195,24 @@ impl InternalProducerClientError {
191195
// So even cleaner would be a tiny retry loop for this special case.
192196
// However, we certainly **do not** want to wait multiple minutes before returning.
193197
Self::ResponseDecodingError(_) => false,
194-
// Reqwest doesn't make this very easy as it has a single error typed used for _everything_
195-
// Maybe a different HTTP client library is nicer in this regard?
198+
// Reqwest doesn't make this very easy as it has a single error typed used for _everything_.
196199
Self::HTTPClientError(inner) => {
197-
inner.is_status() || inner.is_connect() || inner.is_timeout() || inner.is_decode()
200+
// Failures in which a connection could not be established are ephemeral,
201+
// as these can be caused by network failures, so we want to retry:
202+
if inner.is_connect() || inner.is_timeout() || inner.is_decode() {
203+
true
204+
} else if inner.is_status() {
205+
// Failures where the server returns a 5xx status code might indicate the server is (temporarily!) unhealthy,
206+
// or in the process of a restart.
207+
// Any other status is considered a permanent failure however
208+
inner
209+
.status()
210+
.map(|status| status.is_server_error())
211+
.unwrap_or(false)
212+
} else {
213+
// Anything else is a permanent failure.
214+
false
215+
}
198216
}
199217
}
200218
}

0 commit comments

Comments
 (0)