From f8e90f5f36755c5b39bea4d8f928e6a8296a02cf Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 12 Sep 2025 18:05:48 -0400 Subject: [PATCH 1/2] Use thread_local instead of thread_specific_storage for internals mangement thread_local is faster. --- include/pybind11/detail/internals.h | 32 ++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index cd3afdfe36..712709e533 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -514,15 +514,15 @@ class internals_pp_manager { // internals_pp so that it can be pulled from the interpreter's state dict. That is // slow, so we use the current PyThreadState to check if it is necessary. auto *tstate = get_thread_state_unchecked(); - if (!tstate || tstate->interp != last_istate_.get()) { + if (!tstate || tstate->interp != last_istate_tls()) { gil_scoped_acquire_simple gil; if (!tstate) { tstate = get_thread_state_unchecked(); } - last_istate_ = tstate->interp; - internals_tls_p_ = get_or_create_pp_in_state_dict(); + last_istate_tls() = tstate->interp; + internals_p_tls() = get_or_create_pp_in_state_dict(); } - return internals_tls_p_.get(); + return internals_p_tls(); } #endif if (!internals_singleton_pp_) { @@ -536,8 +536,8 @@ class internals_pp_manager { void unref() { #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT if (get_num_interpreters_seen() > 1) { - last_istate_.reset(); - internals_tls_p_.reset(); + last_istate_tls() = nullptr; + internals_p_tls() = nullptr; return; } #endif @@ -549,8 +549,8 @@ class internals_pp_manager { if (get_num_interpreters_seen() > 1) { auto *tstate = get_thread_state_unchecked(); // this could be called without an active interpreter, just use what was cached - if (!tstate || tstate->interp == last_istate_.get()) { - auto tpp = internals_tls_p_.get(); + if (!tstate || tstate->interp == last_istate_tls()) { + auto tpp = internals_p_tls(); if (tpp) { delete tpp; } @@ -589,12 +589,20 @@ class internals_pp_manager { return pp; } - char const *holder_id_ = nullptr; - on_fetch_function *on_fetch_ = nullptr; #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT - thread_specific_storage last_istate_; - thread_specific_storage> internals_tls_p_; + static PyInterpreterState *&last_istate_tls() { + static thread_local PyInterpreterState *last_istate = nullptr; + return last_istate; + } + + static std::unique_ptr *&internals_p_tls() { + static thread_local std::unique_ptr *internals_p = nullptr; + return internals_p; + } #endif + + char const *holder_id_ = nullptr; + on_fetch_function *on_fetch_ = nullptr; std::unique_ptr *internals_singleton_pp_; }; From 372d9946f5fbe87a8dd5fd17417c7bded6149831 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 13 Sep 2025 16:03:39 -0400 Subject: [PATCH 2/2] Make the pp manager a singleton. Strictly speaking, since the members are static, the instances must also be singletons or this wouldn't work. They already are, but we can make the class enforce it to be more 'self-documenting'. --- include/pybind11/detail/internals.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 712709e533..d23ee6ec91 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -502,8 +502,11 @@ template class internals_pp_manager { public: using on_fetch_function = void(InternalsType *); - internals_pp_manager(char const *id, on_fetch_function *on_fetch) - : holder_id_(id), on_fetch_(on_fetch) {} + + inline static internals_pp_manager &get_instance(char const *id, on_fetch_function *on_fetch) { + static internals_pp_manager instance(id, on_fetch); + return instance; + } /// Get the current pointer-to-pointer, allocating it if it does not already exist. May /// acquire the GIL. Will never return nullptr. @@ -564,6 +567,9 @@ class internals_pp_manager { } private: + internals_pp_manager(char const *id, on_fetch_function *on_fetch) + : holder_id_(id), on_fetch_(on_fetch) {} + std::unique_ptr *get_or_create_pp_in_state_dict() { error_scope err_scope; dict state_dict = get_python_state_dict(); @@ -632,10 +638,8 @@ inline internals_pp_manager &get_internals_pp_manager() { #else # define ON_FETCH_FN &check_internals_local_exception_translator #endif - static internals_pp_manager internals_pp_manager(PYBIND11_INTERNALS_ID, - ON_FETCH_FN); + return internals_pp_manager::get_instance(PYBIND11_INTERNALS_ID, ON_FETCH_FN); #undef ON_FETCH_FN - return internals_pp_manager; } /// Return a reference to the current `internals` data @@ -663,9 +667,7 @@ inline internals_pp_manager &get_local_internals_pp_manager() { static const std::string this_module_idstr = PYBIND11_MODULE_LOCAL_ID + std::to_string(reinterpret_cast(&this_module_idstr)); - static internals_pp_manager local_internals_pp_manager( - this_module_idstr.c_str(), nullptr); - return local_internals_pp_manager; + return internals_pp_manager::get_instance(this_module_idstr.c_str(), nullptr); } /// Works like `get_internals`, but for things which are locally registered.