Skip to content

Commit ed13d09

Browse files
authored
Shuffle profile round robin thread order before taking every sample (#41732)
Uses O(n) "modern Fisher–Yates shuffle" - https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm Add C buffer to store order for sampling threads in Profile, which is shuffled on every sample.
1 parent 4bcdf9d commit ed13d09

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

src/signal-handling.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,15 @@ static volatile size_t bt_size_cur = 0;
2525
static volatile uint64_t nsecprof = 0;
2626
static volatile int running = 0;
2727
static const uint64_t GIGA = 1000000000ULL;
28+
static uint64_t profile_cong_rng_seed = 0;
29+
static uint64_t profile_cong_rng_unbias = 0;
30+
static volatile uint64_t *profile_round_robin_thread_order = NULL;
2831
// Timers to take samples at intervals
2932
JL_DLLEXPORT void jl_profile_stop_timer(void);
3033
JL_DLLEXPORT int jl_profile_start_timer(void);
3134
void jl_lock_profile(void);
3235
void jl_unlock_profile(void);
36+
void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64_t *seed);
3337

3438
JL_DLLEXPORT int jl_profile_is_buffer_full(void)
3539
{
@@ -288,13 +292,35 @@ JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
288292
nsecprof = delay_nsec;
289293
if (bt_data_prof != NULL)
290294
free((void*)bt_data_prof);
295+
if (profile_round_robin_thread_order == NULL) {
296+
// NOTE: We currently only allocate this once, since jl_n_threads cannot change
297+
// during execution of a julia process. If/when this invariant changes in the
298+
// future, this will have to be adjusted.
299+
profile_round_robin_thread_order = (uint64_t*) calloc(jl_n_threads, sizeof(uint64_t));
300+
for (int i = 0; i < jl_n_threads; i++) {
301+
profile_round_robin_thread_order[i] = i;
302+
}
303+
}
304+
seed_cong(&profile_cong_rng_seed);
305+
unbias_cong(jl_n_threads, &profile_cong_rng_unbias);
291306
bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
292307
if (bt_data_prof == NULL && maxsize > 0)
293308
return -1;
294309
bt_size_cur = 0;
295310
return 0;
296311
}
297312

313+
void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64_t *seed) {
314+
// The "modern Fisher–Yates shuffle" - O(n) algorithm
315+
// https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
316+
for (size_t i = size - 1; i >= 1; --i) {
317+
size_t j = cong(i, profile_cong_rng_unbias, seed);
318+
uint64_t tmp = carray[j];
319+
carray[j] = carray[i];
320+
carray[i] = tmp;
321+
}
322+
}
323+
298324
JL_DLLEXPORT uint8_t *jl_profile_get_data(void)
299325
{
300326
return (uint8_t*) bt_data_prof;

src/signals-mach.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,6 @@ static kern_return_t profiler_segv_handler
524524
void *mach_profile_listener(void *arg)
525525
{
526526
(void)arg;
527-
int i;
528527
const int max_size = 512;
529528
attach_exception_port(mach_thread_self(), 1);
530529
#ifdef LLVMLIBUNWIND
@@ -541,7 +540,10 @@ void *mach_profile_listener(void *arg)
541540
jl_lock_profile();
542541
void *unused = NULL;
543542
int keymgr_locked = _keymgr_get_and_lock_processwide_ptr_2(KEYMGR_GCC3_DW2_OBJ_LIST, &unused) == 0;
544-
for (i = jl_n_threads; i-- > 0; ) {
543+
jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
544+
for (int idx = jl_n_threads; idx-- > 0; ) {
545+
// Stop the threads in the random round-robin order.
546+
int i = profile_round_robin_thread_order[idx];
545547
// if there is no space left, break early
546548
if (jl_profile_is_buffer_full()) {
547549
jl_profile_stop_timer();

src/signals-unix.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,10 @@ static void *signal_listener(void *arg)
745745
// (so that thread zero gets notified last)
746746
if (critical || profile)
747747
jl_lock_profile();
748-
for (int i = jl_n_threads; i-- > 0; ) {
748+
jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
749+
for (int idx = jl_n_threads; idx-- > 0; ) {
750+
// Stop the threads in the random round-robin order.
751+
int i = profile_round_robin_thread_order[idx];
749752
// notify thread to stop
750753
jl_thread_suspend_and_get_state(i, &signal_context);
751754

0 commit comments

Comments
 (0)