Skip to content

Commit 0e7af21

Browse files
committed
Merge #228: Release 34: Fix deadlocks
Approved-by: Qqwy Priority: Normal Auto-deploy: false
2 parents 703e255 + bcb5472 commit 0e7af21

File tree

9 files changed

+196
-44
lines changed

9 files changed

+196
-44
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

justfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ test-integration *TEST_ARGS: build-bin build-python
4747
cd libs/opsqueue_python
4848
source "./.setup_local_venv.sh"
4949

50-
pytest --color=yes {{TEST_ARGS}}
50+
timeout 30 pytest --color=yes {{TEST_ARGS}}
5151

5252
# Python integration test suite, using artefacts built through Nix. Args are forwarded to pytest
5353
[group('nix')]
@@ -61,7 +61,7 @@ nix-test-integration *TEST_ARGS:
6161
export OPSQUEUE_VIA_NIX=true
6262
export RUST_LOG="opsqueue=debug"
6363

64-
pytest --color=yes {{TEST_ARGS}}
64+
timeout 30 pytest --color=yes {{TEST_ARGS}}
6565

6666
# Run all linters, fast and slow
6767
[group('lint')]

libs/opsqueue_python/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ thiserror = "1.0.65"
3232
# Python FFI:
3333
pyo3 = { version = "0.23.4", features = ["chrono"] }
3434
pyo3-async-runtimes = { version = "0.23.0", features = ["tokio-runtime", "unstable-streams"] }
35+
once_cell = "1.21.3" # Only currently used for `unsync::OnceCell` as part of PyO3 async helpers.
3536

3637
# Logging/tracing:
3738
pyo3-log = "0.12.1"

libs/opsqueue_python/pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,15 @@ test = [
5252
"pytest==8.3.3",
5353
"pytest-random-order==1.1.1",
5454
"pytest-parallel==0.1.1",
55+
"pytest-timeout==2.4.0",
5556
"py==1.11.0", # Needs to be manually specified because of this issue: https://github.com/kevlened/pytest-parallel/issues/118
5657
]
5758

5859
[tool.pytest.ini_options]
5960
# We ensure tests never rely on global state,
6061
# by running them in a random order, and in parallel:
6162
addopts = "--random-order --workers=auto"
63+
# Individual tests should be very fast. They should never take multiple seconds
64+
# If after 20sec (accomodating for a toaster-like PC) there is no progress,
65+
# assume a deadlock
66+
timeout=20
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//! Helpers to bridge async Rust and PyO3's flavour of async Python
2+
//!
3+
use pyo3::{Bound, IntoPyObject, PyAny, PyResult, Python};
4+
use pyo3_async_runtimes::TaskLocals;
5+
use std::{
6+
future::Future,
7+
pin::{pin, Pin},
8+
task::{Context, Poll},
9+
};
10+
11+
/// Unlock the GIL across `.await` points
12+
///
13+
/// Essentially `py.allow_threads` but for async code.
14+
///
15+
/// Based on https://pyo3.rs/v0.25.1/async-await.html#release-the-gil-across-await
16+
pub struct AsyncAllowThreads<F>(F);
17+
18+
pub fn async_allow_threads<F>(fut: F) -> AsyncAllowThreads<F>
19+
where
20+
F: Future,
21+
{
22+
AsyncAllowThreads(fut)
23+
}
24+
25+
impl<F> Future for AsyncAllowThreads<F>
26+
where
27+
F: Future + Unpin + Send,
28+
F::Output: Send,
29+
{
30+
type Output = F::Output;
31+
32+
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
33+
let waker = cx.waker();
34+
Python::with_gil(|py| {
35+
py.allow_threads(|| pin!(&mut self.0).poll(&mut Context::from_waker(waker)))
36+
})
37+
}
38+
}
39+
40+
/// Version of `future_into_py` that uses the `TokioRuntimeThatIsInScope`
41+
pub fn future_into_py<T, F>(py: Python<'_>, fut: F) -> PyResult<Bound<'_, PyAny>>
42+
where
43+
F: Future<Output = PyResult<T>> + Send + 'static,
44+
T: for<'py> IntoPyObject<'py>,
45+
{
46+
pyo3_async_runtimes::generic::future_into_py::<TokioRuntimeThatIsInScope, F, T>(py, fut)
47+
}
48+
49+
/// An alternative runtime for `pyo3_async_runtimes`
50+
/// since `pyo3_async_runtimes::tokio` runs its _own_ Tokio runtime
51+
/// rather than using whatever is in scope or allowing the user to pass the specific runtime.
52+
///
53+
/// How this runtime works, is to use whatever Tokio runtime was entered using `runtime.enter()` beforehand
54+
struct TokioRuntimeThatIsInScope();
55+
56+
use once_cell::unsync::OnceCell as UnsyncOnceCell;
57+
58+
tokio::task_local! {
59+
static TASK_LOCALS: UnsyncOnceCell<TaskLocals>;
60+
}
61+
62+
impl pyo3_async_runtimes::generic::Runtime for TokioRuntimeThatIsInScope {
63+
type JoinError = tokio::task::JoinError;
64+
type JoinHandle = tokio::task::JoinHandle<()>;
65+
66+
fn spawn<F>(fut: F) -> Self::JoinHandle
67+
where
68+
F: std::future::Future<Output = ()> + Send + 'static,
69+
{
70+
tokio::task::spawn(async move {
71+
fut.await;
72+
})
73+
}
74+
}
75+
76+
impl pyo3_async_runtimes::generic::ContextExt for TokioRuntimeThatIsInScope {
77+
fn scope<F, R>(
78+
locals: TaskLocals,
79+
fut: F,
80+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = R> + Send>>
81+
where
82+
F: std::future::Future<Output = R> + Send + 'static,
83+
{
84+
let cell = UnsyncOnceCell::new();
85+
cell.set(locals).unwrap();
86+
87+
Box::pin(TASK_LOCALS.scope(cell, fut))
88+
}
89+
90+
fn get_task_locals() -> Option<TaskLocals> {
91+
TASK_LOCALS
92+
.try_with(|c| {
93+
c.get()
94+
.map(|locals| Python::with_gil(|py| locals.clone_ref(py)))
95+
})
96+
.unwrap_or_default()
97+
}
98+
}

libs/opsqueue_python/src/common.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -443,10 +443,24 @@ pub async fn check_signals_in_background() -> FatalPythonException {
443443

444444
/// Sets up a Tokio runtime to use for a client.
445445
///
446-
/// Rather than the current-thread scheduler,
447-
/// we use a (single extra!) background thread,
448-
/// allowing us to keep (GIL-less) tasks alive in the background
449-
/// even when returning back to Python
446+
/// Note that we very intentionally use the multi-threaded scheduler
447+
/// but with a single thread.
448+
///
449+
/// We **cannot** use the current_thread scheduler,
450+
/// since that would result in the Python task scheduler (e.g. `asyncio`)
451+
/// and the Rust task scheduler (`Tokio`) to run on the same thread.
452+
/// This seems to work fine, until you end up with a Python future
453+
/// that depends on a Rust future completing or vice-versa:
454+
/// Since the task schedulers each have their own task queues,
455+
/// work co-operatively, and know nothing of each-other,
456+
/// they will not (nor can they) yield to the other.
457+
/// The result: deadlocks!
458+
///
459+
/// Therefore, we run the Tokio scheduler on a separate thread.
460+
/// Since switching between the 'Python scheduler thread' and the
461+
/// 'Tokio scheduler thread' is preemptive,
462+
/// the same problem now no longer occurs:
463+
/// Both schedulers are able to make forward progress (even on a 1-CPU machine).
450464
pub fn start_runtime() -> Arc<tokio::runtime::Runtime> {
451465
let runtime = tokio::runtime::Builder::new_multi_thread()
452466
.worker_threads(1)

libs/opsqueue_python/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mod async_util;
12
pub mod common;
23
pub mod consumer;
34
pub mod errors;

libs/opsqueue_python/src/producer.rs

Lines changed: 61 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use opsqueue::{
2323
use ux_serde::u63;
2424

2525
use crate::{
26+
async_util,
2627
common::{run_unless_interrupted, start_runtime, SubmissionId, SubmissionStatus},
2728
errors::{self, CError, CPyResult, FatalPythonException},
2829
};
@@ -217,17 +218,19 @@ impl ProducerClient {
217218
py.allow_threads(|| {
218219
let prefix = uuid::Uuid::now_v7().to_string();
219220
tracing::debug!("Uploading submission chunks to object store subfolder {prefix}...");
220-
let chunk_count = Python::with_gil(|py| {
221-
self.block_unless_interrupted(async {
222-
let chunk_contents = chunk_contents.bind(py);
223-
let stream = futures::stream::iter(chunk_contents)
224-
.map(|item| item.and_then(|item| item.extract()).map_err(Into::into));
221+
let chunk_count = self.block_unless_interrupted(async {
222+
let chunk_contents = std::iter::from_fn(move || {
223+
Python::with_gil(|py|
224+
chunk_contents.bind(py).clone().next()
225+
.map(|item| item.and_then(
226+
|item| item.extract()).map_err(Into::into)))
227+
});
228+
let stream = futures::stream::iter(chunk_contents);
225229
self.object_store_client
226230
.store_chunks(&prefix, ChunkType::Input, stream)
227231
.await
228232
.map_err(|e| CError(R(L(e))))
229-
})
230-
})?;
233+
})?;
231234
let chunk_count = chunk::ChunkIndex::from(chunk_count);
232235
tracing::debug!("Finished uploading to object store. {prefix} contains {chunk_count} chunks");
233236

@@ -360,15 +363,18 @@ impl ProducerClient {
360363
) -> PyResult<Bound<'p, PyAny>> {
361364
let me = self.clone();
362365
let _tokio_active_runtime_guard = me.runtime.enter();
363-
pyo3_async_runtimes::tokio::future_into_py(py, async move {
364-
match me.stream_completed_submission_chunks(submission_id).await {
365-
Ok(iter) => {
366-
let async_iter = PyChunksAsyncIter::from(iter);
367-
Ok(async_iter)
366+
async_util::future_into_py(
367+
py,
368+
async_util::async_allow_threads(Box::pin(async move {
369+
match me.stream_completed_submission_chunks(submission_id).await {
370+
Ok(iter) => {
371+
let async_iter = PyChunksAsyncIter::from(iter);
372+
Ok(async_iter)
373+
}
374+
Err(e) => PyResult::Err(e.into()),
368375
}
369-
Err(e) => PyResult::Err(e.into()),
370-
}
371-
})
376+
})),
377+
)
372378
}
373379
}
374380

@@ -462,7 +468,7 @@ pub type ChunksStream = BoxStream<'static, CPyResult<Vec<u8>, ChunkRetrievalErro
462468

463469
#[pyclass]
464470
pub struct PyChunksIter {
465-
stream: tokio::sync::Mutex<ChunksStream>,
471+
stream: Arc<tokio::sync::Mutex<ChunksStream>>,
466472
runtime: Arc<tokio::runtime::Runtime>,
467473
}
468474

@@ -475,7 +481,7 @@ impl PyChunksIter {
475481
.map_err(CError)
476482
.boxed();
477483
Self {
478-
stream: tokio::sync::Mutex::new(stream),
484+
stream: Arc::new(tokio::sync::Mutex::new(stream)),
479485
runtime: client.runtime.clone(),
480486
}
481487
}
@@ -487,11 +493,21 @@ impl PyChunksIter {
487493
slf
488494
}
489495

490-
fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<CPyResult<Vec<u8>, ChunkRetrievalError>> {
491-
let me = &mut *slf;
492-
let runtime = &mut me.runtime;
493-
let stream = &mut me.stream;
494-
runtime.block_on(async { stream.lock().await.next().await })
496+
fn __next__(&self, py: Python<'_>) -> Option<CPyResult<Vec<u8>, ChunkRetrievalError>> {
497+
// The only time we need the GIL is when turning the result back.
498+
// By unlocking here, we reduce the chance of deadlocks.
499+
py.allow_threads(move || {
500+
let runtime = self.runtime.clone();
501+
let stream = self.stream.clone();
502+
runtime.block_on(async {
503+
// We lock the stream in a separate Tokio task
504+
// that explicitly runs on the runtime thread rather than on the main Python thread.
505+
// This reduces the possibility for deadlocks even further.
506+
tokio::task::spawn(async move { stream.lock().await.next().await })
507+
.await
508+
.expect("Top-level spawn to succeed")
509+
})
510+
})
495511
}
496512

497513
fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -508,7 +524,7 @@ pub struct PyChunksAsyncIter {
508524
impl From<PyChunksIter> for PyChunksAsyncIter {
509525
fn from(iter: PyChunksIter) -> Self {
510526
Self {
511-
stream: Arc::new(iter.stream),
527+
stream: iter.stream,
512528
runtime: iter.runtime,
513529
}
514530
}
@@ -520,16 +536,27 @@ impl PyChunksAsyncIter {
520536
slf
521537
}
522538

523-
fn __anext__(slf: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
524-
let _tokio_active_runtime_guard = slf.runtime.enter();
525-
let stream = slf.stream.clone();
526-
pyo3_async_runtimes::tokio::future_into_py(slf.py(), async move {
527-
let res = stream.lock().await.next().await;
528-
match res {
529-
None => Err(PyStopAsyncIteration::new_err(())),
530-
Some(Ok(val)) => Ok(Some(val)),
531-
Some(Err(e)) => Err(e.into()),
532-
}
533-
})
539+
fn __anext__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
540+
let stream = self.stream.clone();
541+
let _tokio_active_runtime_guard = self.runtime.enter();
542+
543+
async_util::future_into_py(
544+
py,
545+
// The only time we need the GIL is when turning the result into Python datatypes.
546+
// By unlocking here, we reduce the chance of deadlocks.
547+
async_util::async_allow_threads(Box::pin(async move {
548+
// We lock the stream in a separate Tokio task
549+
// that explicitly runs on the runtime thread rather than on the main Python thread.
550+
// This reduces the possibility for deadlocks even further.
551+
let res = tokio::task::spawn(async move { stream.lock().await.next().await })
552+
.await
553+
.expect("Top-level spawn to succeed");
554+
match res {
555+
None => Err(PyStopAsyncIteration::new_err(())),
556+
Some(Ok(val)) => Ok(Some(val)),
557+
Some(Err(e)) => Err(e.into()),
558+
}
559+
})),
560+
)
534561
}
535562
}

libs/opsqueue_python/tests/conftest.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
# print("A")
2020
# multiprocessing.set_start_method('forkserver')
2121

22+
PROJECT_ROOT = Path(__file__).parents[3]
23+
2224

2325
@dataclass
2426
class OpsqueueProcess:
@@ -34,8 +36,12 @@ def opsqueue_bin_location() -> Path:
3436
)
3537
return Path(deriv_path) / "bin" / "opsqueue"
3638
else:
37-
subprocess.run(["cargo", "build", "--quiet", "--bin", "opsqueue"])
38-
return Path(".", "target", "debug", "opsqueue")
39+
subprocess.run(
40+
["cargo", "build", "--quiet", "--bin", "opsqueue"],
41+
cwd=PROJECT_ROOT,
42+
check=True,
43+
)
44+
return PROJECT_ROOT / Path("target", "debug", "opsqueue")
3945

4046

4147
@pytest.fixture
@@ -62,12 +68,11 @@ def opsqueue_service(
6268
"--database-filename",
6369
temp_dbname,
6470
]
65-
cwd = "../../"
6671
env = os.environ.copy() # We copy the env so e.g. RUST_LOG and other env vars are propagated from outside of the invocation of pytest
6772
if env.get("RUST_LOG") is None:
6873
env["RUST_LOG"] = "off"
6974

70-
with subprocess.Popen(command, cwd=cwd, env=env) as process:
75+
with subprocess.Popen(command, cwd=PROJECT_ROOT, env=env) as process:
7176
try:
7277
wrapper = OpsqueueProcess(port=port, process=process)
7378
yield wrapper

0 commit comments

Comments
 (0)