diff --git a/packages/ucache_bench/run.py b/packages/ucache_bench/run.py index d0624163..a283bf5f 100644 --- a/packages/ucache_bench/run.py +++ b/packages/ucache_bench/run.py @@ -235,6 +235,10 @@ def run_server(args: argparse.Namespace) -> None: server_cmd.append( f"--rpc_num_cpu_worker_threads={args.rpc_num_cpu_worker_threads}" ) + if args.rpc_socket_max_reads_per_event != 1: + server_cmd.append( + f"--rpc_socket_max_reads_per_event={args.rpc_socket_max_reads_per_event}" + ) # CPU pinning configuration if args.cpu_pinning_enabled: @@ -461,6 +465,12 @@ def init_parser() -> argparse.ArgumentParser: default=1, help="Number of CPU worker threads for ThriftServer", ) + server_parser.add_argument( + "--rpc-socket-max-reads-per-event", + type=int, + default=1, + help="Max reads per socket per event loop iteration (production uses 1, ThriftServer default is 16)", + ) # CPU pinning configuration server_parser.add_argument( diff --git a/packages/ucache_bench/server/UcacheBenchRpcServer.cpp b/packages/ucache_bench/server/UcacheBenchRpcServer.cpp index 941164d6..559964fe 100644 --- a/packages/ucache_bench/server/UcacheBenchRpcServer.cpp +++ b/packages/ucache_bench/server/UcacheBenchRpcServer.cpp @@ -27,6 +27,12 @@ DEFINE_uint32( 1, "Number of CPU worker threads for ThriftServer. " "Production ucache uses 1. These handle CPU-bound work separate from IO"); +DEFINE_uint32( + rpc_socket_max_reads_per_event, + 1, + "Max reads per socket per event loop iteration. " + "Production ucache uses 1. ThriftServer default is 16. " + "Higher values let a single connection deliver more requests per epoll wakeup"); // CPU pinning configuration flags DEFINE_bool( @@ -163,7 +169,8 @@ apache::thrift::ThriftServer& UcacheBenchRpcServer::addThriftServer() { // Prevent single connection from monopolizing an IO thread's event loop. // Without this, a few hot connections can starve others, limiting // multi-client scalability. - thriftServer_->setSocketMaxReadsPerEvent(1); + thriftServer_->setSocketMaxReadsPerEvent( + FLAGS_rpc_socket_max_reads_per_event); // Disable timeouts — let clients control timing, same as production ucache. thriftServer_->setQueueTimeout(std::chrono::milliseconds(0)); @@ -176,8 +183,10 @@ apache::thrift::ThriftServer& UcacheBenchRpcServer::addThriftServer() { thriftServer_->disableActiveRequestsTracking(); XLOG(INFO) << "ThriftServer configured with " - << FLAGS_rpc_num_cpu_worker_threads << " CPU worker threads and " - << numAcceptorThreads << " acceptor threads"; + << FLAGS_rpc_num_cpu_worker_threads << " CPU worker threads, " + << numAcceptorThreads << " acceptor threads, " + << "socketMaxReadsPerEvent=" + << FLAGS_rpc_socket_max_reads_per_event; return *thriftServer_; } diff --git a/packages/ucache_bench/server/UcacheBenchServer.cpp b/packages/ucache_bench/server/UcacheBenchServer.cpp index 65fb2cb0..6ea0f9c7 100644 --- a/packages/ucache_bench/server/UcacheBenchServer.cpp +++ b/packages/ucache_bench/server/UcacheBenchServer.cpp @@ -45,6 +45,11 @@ void UcacheBenchServer::setupCacheLib() { cacheConfig.setAccessConfig( {config_.hash_power, config_.hashtable_lock_power}); + // Configure number of CacheLib shards if specified + if (config_.cachelib_num_shards > 0) { + cacheConfig.setNumShards(config_.cachelib_num_shards); + } + // Generate alloc sizes (factor 1.25, min allocation size) // This provides a good distribution of allocation classes for cache items // Max alloc size increased to 64KB to support production traffic distribution