-
Notifications
You must be signed in to change notification settings - Fork 241
Add per-thread CPU stats reporting in JSON output #346
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
bdb495a
82b5a3c
92d4f43
5abb114
bff2e9c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,10 +75,15 @@ | |
| #include <atomic> | ||
| #include <algorithm> | ||
|
|
||
| #ifdef __APPLE__ | ||
| #include <mach/mach.h> | ||
| #endif | ||
|
|
||
| #include "client.h" | ||
| #include "JSON_handler.h" | ||
| #include "obj_gen.h" | ||
| #include "memtier_benchmark.h" | ||
| #include "run_stats_types.h" | ||
| #include "statsd.h" | ||
|
|
||
|
|
||
|
|
@@ -1749,6 +1754,23 @@ static void print_all_threads_stack_trace(FILE *fp, int pid, const char *timestr | |
| } | ||
| } | ||
|
|
||
| static unsigned long long get_thread_cpu_usec(pthread_t thread) { | ||
| #ifdef __APPLE__ | ||
| mach_port_t mt = pthread_mach_thread_np(thread); | ||
| thread_basic_info_data_t info; | ||
| mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; | ||
| if (thread_info(mt, THREAD_BASIC_INFO, (thread_info_t)&info, &count) != KERN_SUCCESS) return 0; | ||
| return (unsigned long long)info.user_time.seconds * 1000000 + info.user_time.microseconds | ||
| + (unsigned long long)info.system_time.seconds * 1000000 + info.system_time.microseconds; | ||
| #else | ||
| clockid_t cid; | ||
| if (pthread_getcpuclockid(thread, &cid) != 0) return 0; | ||
| struct timespec ts; | ||
| if (clock_gettime(cid, &ts) != 0) return 0; | ||
| return (unsigned long long)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; | ||
| #endif | ||
| } | ||
|
|
||
| run_stats run_benchmark(int run_id, benchmark_config *cfg, object_generator *obj_gen) | ||
| { | ||
| fprintf(stderr, "[RUN #%u] Preparing benchmark client...\n", run_id); | ||
|
|
@@ -1789,6 +1811,17 @@ run_stats run_benchmark(int run_id, benchmark_config *cfg, object_generator *obj | |
| unsigned long int cur_ops_sec = 0; | ||
| unsigned long int cur_bytes_sec = 0; | ||
|
|
||
| // CPU usage tracking: sample initial CPU times for main thread and all worker threads | ||
| std::vector<per_second_cpu_stats> cpu_history; | ||
| unsigned long long main_prev_cpu = get_thread_cpu_usec(pthread_self()); | ||
| std::vector<unsigned long long> thread_prev_cpu(threads.size()); | ||
| for (size_t t = 0; t < threads.size(); t++) { | ||
| thread_prev_cpu[t] = get_thread_cpu_usec(threads[t]->m_thread); | ||
| } | ||
| unsigned int cpu_second = 0; | ||
| struct timeval cpu_prev_tv; | ||
| gettimeofday(&cpu_prev_tv, NULL); | ||
|
|
||
| // provide some feedback... | ||
| // NOTE: Reading stats from worker threads without synchronization is a benign race. | ||
| // These stats are only for progress display and are approximate. Final results are | ||
|
|
@@ -1901,6 +1934,36 @@ run_stats run_benchmark(int run_id, benchmark_config *cfg, object_generator *obj | |
| cur_ops_sec, ops_sec, cur_bytes_str, bytes_str, cur_latency, avg_latency); | ||
| } | ||
|
|
||
| // Collect per-second CPU usage for main thread and all worker threads | ||
| cpu_second++; | ||
| per_second_cpu_stats cpu_snap; | ||
| cpu_snap.m_second = cpu_second; | ||
|
|
||
| struct timeval cpu_cur_tv; | ||
| gettimeofday(&cpu_cur_tv, NULL); | ||
| double wall_usec = (double)(cpu_cur_tv.tv_sec - cpu_prev_tv.tv_sec) * 1000000.0 | ||
| + (double)(cpu_cur_tv.tv_usec - cpu_prev_tv.tv_usec); | ||
| if (wall_usec < 1.0) wall_usec = 1.0; // guard against division by zero | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wall clock adjustment causes incorrect CPU percentagesLow Severity If the system clock is adjusted backwards (via NTP, daylight saving, or manual change) during benchmark execution, |
||
|
|
||
| unsigned long long main_cur_cpu = get_thread_cpu_usec(pthread_self()); | ||
| unsigned long long main_delta = (main_cur_cpu > main_prev_cpu) ? main_cur_cpu - main_prev_cpu : 0; | ||
| cpu_snap.m_main_thread_cpu_pct = (double)main_delta / wall_usec * 100.0; | ||
| main_prev_cpu = main_cur_cpu; | ||
|
|
||
| for (size_t t = 0; t < threads.size(); t++) { | ||
| unsigned long long cur_cpu = get_thread_cpu_usec(threads[t]->m_thread); | ||
| unsigned long long delta = (cur_cpu > thread_prev_cpu[t]) ? cur_cpu - thread_prev_cpu[t] : 0; | ||
| double cpu_pct = (double)delta / wall_usec * 100.0; | ||
| cpu_snap.m_thread_cpu_pct.push_back(cpu_pct); | ||
| thread_prev_cpu[t] = cur_cpu; | ||
|
cursor[bot] marked this conversation as resolved.
|
||
|
|
||
| if (cpu_pct > 95.0) { | ||
| fprintf(stderr, "\nWARNING: High CPU on thread %zu: %.1f%% - results may be unreliable\n", t, cpu_pct); | ||
| } | ||
| } | ||
| cpu_prev_tv = cpu_cur_tv; | ||
| cpu_history.push_back(cpu_snap); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CPU stats break across thread restartsMedium Severity Per-thread CPU tracking keeps |
||
|
|
||
| // Send metrics to StatsD if configured | ||
| if (cfg->statsd != NULL && cfg->statsd->is_enabled()) { | ||
| cfg->statsd->gauge("ops_sec", (long) cur_ops_sec); | ||
|
|
@@ -1983,6 +2046,8 @@ run_stats run_benchmark(int run_id, benchmark_config *cfg, object_generator *obj | |
| (*i)->m_cg->merge_run_stats(&stats); | ||
| } | ||
|
|
||
| stats.set_cpu_stats(std::move(cpu_history)); | ||
|
|
||
| // Do we need to produce client stats? | ||
| if (cfg->client_stats != NULL) { | ||
| unsigned int cg_id = 0; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1389,6 +1389,11 @@ void run_stats::print_kb_sec_column(output_table &table, const std::vector<aggre | |
| table.add_column(column); | ||
| } | ||
|
|
||
| void run_stats::set_cpu_stats(std::vector<per_second_cpu_stats> cpu_stats) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CPU stats missing from aggregated resultsLow Severity When Additional Locations (1) |
||
| { | ||
| m_cpu_stats = std::move(cpu_stats); | ||
| } | ||
|
|
||
| void run_stats::print_json(json_handler *jsonhandler, arbitrary_command_list &command_list, bool cluster_mode, | ||
| const std::vector<aggregated_command_type_stats> *aggregated) | ||
| { | ||
|
|
@@ -1480,6 +1485,24 @@ void run_stats::print_json(json_handler *jsonhandler, arbitrary_command_list &co | |
| m_totals.m_total_latency, m_totals.m_ops, m_totals.m_connection_errors_sec, | ||
| m_totals.m_connection_errors, quantiles_list, m_totals.latency_histogram, timestamps, | ||
| total_stats); | ||
|
|
||
| if (jsonhandler != NULL && !m_cpu_stats.empty()) { | ||
| jsonhandler->open_nesting("CPU Stats"); | ||
| for (size_t i = 0; i < m_cpu_stats.size(); i++) { | ||
| const per_second_cpu_stats &cs = m_cpu_stats[i]; | ||
| char sec_str[32]; | ||
| snprintf(sec_str, sizeof(sec_str), "%u", cs.m_second); | ||
| jsonhandler->open_nesting(sec_str); | ||
| jsonhandler->write_obj("Main Thread", "%.2f", cs.m_main_thread_cpu_pct); | ||
| for (size_t t = 0; t < cs.m_thread_cpu_pct.size(); t++) { | ||
| char thread_name[32]; | ||
| snprintf(thread_name, sizeof(thread_name), "Thread %zu", t); | ||
| jsonhandler->write_obj(thread_name, "%.2f", cs.m_thread_cpu_pct[t]); | ||
| } | ||
| jsonhandler->close_nesting(); | ||
| } | ||
| jsonhandler->close_nesting(); | ||
| } | ||
| } | ||
|
|
||
| void run_stats::print_histogram(FILE *out, json_handler *jsonhandler, arbitrary_command_list &command_list, | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uninitialized pthread handle used for CPU measurement
High Severity
The code reads
threads[t]->m_threadimmediately after callingstart()without verifying that thread creation succeeded. Ifpthread_createfails,m_threadremains uninitialized and passing it toget_thread_cpu_useccauses undefined behavior (potential crash when callingpthread_mach_thread_npon macOS orpthread_getcpuclockidon Linux with garbage pthread_t values).Additional Locations (1)
memtier_benchmark.cpp#L1896-L1897