From 45ff6c6fd9227f687a4f87c135904115c645a791 Mon Sep 17 00:00:00 2001 From: Yupeng Tang Date: Tue, 31 Mar 2026 12:53:54 -0700 Subject: [PATCH 1/2] Apply cachelib_num_shards config to CacheAllocator (#518) Summary: Pull Request resolved: https://github.com/facebookresearch/DCPerf/pull/518 The cachelib_num_shards parameter was parsed from gflags and stored in UcacheBenchConfig but never actually applied to the CacheAllocator::Config. This meant the config value was silently ignored and CacheLib used its default of 8192 shards. Now call setNumShards() when cachelib_num_shards > 0, allowing the benchmark to match production shard counts for more accurate CPU utilization profiling. Differential Revision: D96087814 --- packages/ucache_bench/server/UcacheBenchServer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/ucache_bench/server/UcacheBenchServer.cpp b/packages/ucache_bench/server/UcacheBenchServer.cpp index 65fb2cb0..6ea0f9c7 100644 --- a/packages/ucache_bench/server/UcacheBenchServer.cpp +++ b/packages/ucache_bench/server/UcacheBenchServer.cpp @@ -45,6 +45,11 @@ void UcacheBenchServer::setupCacheLib() { cacheConfig.setAccessConfig( {config_.hash_power, config_.hashtable_lock_power}); + // Configure number of CacheLib shards if specified + if (config_.cachelib_num_shards > 0) { + cacheConfig.setNumShards(config_.cachelib_num_shards); + } + // Generate alloc sizes (factor 1.25, min allocation size) // This provides a good distribution of allocation classes for cache items // Max alloc size increased to 64KB to support production traffic distribution From ab49b147edb457c8aa1773fc33331471f29ee9f1 Mon Sep 17 00:00:00 2001 From: Yupeng Tang Date: Tue, 31 Mar 2026 12:58:56 -0700 Subject: [PATCH 2/2] Add configurable rpc_socket_max_reads_per_event parameter (#526) Summary: Pull Request resolved: https://github.com/facebookresearch/DCPerf/pull/526 Add support for configuring ThriftServer's socketMaxReadsPerEvent via CLI flag. This controls how many reads a single connection can perform per event loop iteration, which affects multi-client scalability. Changes: - Add rpc_socket_max_reads_per_event gflag to UcacheBenchRpcServer.cpp - Apply flag value to thriftServer_->setSocketMaxReadsPerEvent() - Add parameter to benchmark configs (debug/large/medium/small) with default value of 1 matching production ucache - Add --rpc-socket-max-reads-per-event CLI arg in jobs_internal.yml - Add parameter to ALLOWED_PARAMS in ucache_bench_benchmark.py Reviewed By: excelle08 Differential Revision: D96763733 --- packages/ucache_bench/run.py | 10 ++++++++++ .../ucache_bench/server/UcacheBenchRpcServer.cpp | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/packages/ucache_bench/run.py b/packages/ucache_bench/run.py index d0624163..a283bf5f 100644 --- a/packages/ucache_bench/run.py +++ b/packages/ucache_bench/run.py @@ -235,6 +235,10 @@ def run_server(args: argparse.Namespace) -> None: server_cmd.append( f"--rpc_num_cpu_worker_threads={args.rpc_num_cpu_worker_threads}" ) + if args.rpc_socket_max_reads_per_event != 1: + server_cmd.append( + f"--rpc_socket_max_reads_per_event={args.rpc_socket_max_reads_per_event}" + ) # CPU pinning configuration if args.cpu_pinning_enabled: @@ -461,6 +465,12 @@ def init_parser() -> argparse.ArgumentParser: default=1, help="Number of CPU worker threads for ThriftServer", ) + server_parser.add_argument( + "--rpc-socket-max-reads-per-event", + type=int, + default=1, + help="Max reads per socket per event loop iteration (production uses 1, ThriftServer default is 16)", + ) # CPU pinning configuration server_parser.add_argument( diff --git a/packages/ucache_bench/server/UcacheBenchRpcServer.cpp b/packages/ucache_bench/server/UcacheBenchRpcServer.cpp index 941164d6..559964fe 100644 --- a/packages/ucache_bench/server/UcacheBenchRpcServer.cpp +++ b/packages/ucache_bench/server/UcacheBenchRpcServer.cpp @@ -27,6 +27,12 @@ DEFINE_uint32( 1, "Number of CPU worker threads for ThriftServer. " "Production ucache uses 1. These handle CPU-bound work separate from IO"); +DEFINE_uint32( + rpc_socket_max_reads_per_event, + 1, + "Max reads per socket per event loop iteration. " + "Production ucache uses 1. ThriftServer default is 16. " + "Higher values let a single connection deliver more requests per epoll wakeup"); // CPU pinning configuration flags DEFINE_bool( @@ -163,7 +169,8 @@ apache::thrift::ThriftServer& UcacheBenchRpcServer::addThriftServer() { // Prevent single connection from monopolizing an IO thread's event loop. // Without this, a few hot connections can starve others, limiting // multi-client scalability. - thriftServer_->setSocketMaxReadsPerEvent(1); + thriftServer_->setSocketMaxReadsPerEvent( + FLAGS_rpc_socket_max_reads_per_event); // Disable timeouts — let clients control timing, same as production ucache. thriftServer_->setQueueTimeout(std::chrono::milliseconds(0)); @@ -176,8 +183,10 @@ apache::thrift::ThriftServer& UcacheBenchRpcServer::addThriftServer() { thriftServer_->disableActiveRequestsTracking(); XLOG(INFO) << "ThriftServer configured with " - << FLAGS_rpc_num_cpu_worker_threads << " CPU worker threads and " - << numAcceptorThreads << " acceptor threads"; + << FLAGS_rpc_num_cpu_worker_threads << " CPU worker threads, " + << numAcceptorThreads << " acceptor threads, " + << "socketMaxReadsPerEvent=" + << FLAGS_rpc_socket_max_reads_per_event; return *thriftServer_; }