From 5eec36e869b9de8037665c3991e64f8eb6a24d59 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 17:07:23 +0000 Subject: [PATCH 1/4] Initial plan From b65b37c330cccfec8bc13683d171e38d0caf9710 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 17:16:43 +0000 Subject: [PATCH 2/4] Implement arena/bump allocation for message bodies Co-authored-by: gubatron <163977+gubatron@users.noreply.github.com> --- Cargo.toml | 3 ++- src/cloudllm/client_wrapper.rs | 6 +++--- src/cloudllm/clients/claude.rs | 2 +- src/cloudllm/clients/gemini.rs | 6 +++--- src/cloudllm/clients/grok.rs | 2 +- src/cloudllm/clients/openai.rs | 7 ++++--- src/cloudllm/llm_session.rs | 29 +++++++++++++++++++++++++---- 7 files changed, 39 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fb1dd08..da53025 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,4 +17,5 @@ openai-rust2 = { version = "1.6.0" } async-trait = "0.1.88" log = "0.4.27" -env_logger = "0.11.8" \ No newline at end of file +env_logger = "0.11.8" +bumpalo = "3.16" \ No newline at end of file diff --git a/src/cloudllm/client_wrapper.rs b/src/cloudllm/client_wrapper.rs index de8108d..87761e4 100644 --- a/src/cloudllm/client_wrapper.rs +++ b/src/cloudllm/client_wrapper.rs @@ -7,7 +7,7 @@ use openai_rust2 as openai_rust; /// and uses a ClientWrapper to interact with the LLM. // src/client_wrapper use std::error::Error; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; /// Represents the possible roles for a message. #[derive(Clone)] @@ -33,8 +33,8 @@ pub struct TokenUsage { pub struct Message { /// The role associated with the message. pub role: Role, - /// The actual content of the message. - pub content: String, + /// The actual content of the message stored as Arc to avoid clones. + pub content: Arc, } /// Trait defining the interface to interact with various LLM services. diff --git a/src/cloudllm/clients/claude.rs b/src/cloudllm/clients/claude.rs index e08682a..734f69e 100644 --- a/src/cloudllm/clients/claude.rs +++ b/src/cloudllm/clients/claude.rs @@ -119,7 +119,7 @@ pub fn test_claude_client() { error!("Error: {}", e); Message { role: Role::System, - content: format!("An error occurred: {:?}", e), + content: format!("An error occurred: {:?}", e).into(), } } } diff --git a/src/cloudllm/clients/gemini.rs b/src/cloudllm/clients/gemini.rs index 74e92e4..ea7b355 100644 --- a/src/cloudllm/clients/gemini.rs +++ b/src/cloudllm/clients/gemini.rs @@ -8,7 +8,7 @@ use openai_rust::chat; use openai_rust2 as openai_rust; use std::env; use std::error::Error; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; use tokio::runtime::Runtime; pub struct GeminiClient { @@ -197,7 +197,7 @@ impl ClientWrapper for GeminiClient { Role::User => "user".to_owned(), Role::Assistant => "assistant".to_owned(), }, - content: msg.content, + content: msg.content.to_string(), }) .collect(); @@ -216,7 +216,7 @@ impl ClientWrapper for GeminiClient { match result { Ok(content) => Ok(Message { role: Role::Assistant, - content, + content: Arc::from(content.as_str()), }), Err(err) => { error!("GeminiClient::send_message error: {}", err); diff --git a/src/cloudllm/clients/grok.rs b/src/cloudllm/clients/grok.rs index 9af32f2..c31493f 100644 --- a/src/cloudllm/clients/grok.rs +++ b/src/cloudllm/clients/grok.rs @@ -135,7 +135,7 @@ pub fn test_grok_client() { error!("Error: {}", e); Message { role: Role::System, - content: format!("An error occurred: {:?}", e), + content: format!("An error occurred: {:?}", e).into(), } } } diff --git a/src/cloudllm/clients/openai.rs b/src/cloudllm/clients/openai.rs index 6b70f7e..13e121b 100644 --- a/src/cloudllm/clients/openai.rs +++ b/src/cloudllm/clients/openai.rs @@ -43,6 +43,7 @@ use std::env; use std::error::Error; +use std::sync::Arc; use async_trait::async_trait; use log::{error, info}; @@ -160,7 +161,7 @@ impl ClientWrapper for OpenAIClient { Role::Assistant => "assistant".to_owned(), // Extend this match as new roles are added to the Role enum }, - content: msg.content, + content: msg.content.to_string(), }) .collect(); @@ -179,7 +180,7 @@ impl ClientWrapper for OpenAIClient { match result { Ok(c) => Ok(Message { role: Role::Assistant, - content: c, + content: Arc::from(c.as_str()), }), Err(_) => { error!( @@ -228,7 +229,7 @@ pub fn test_openai_client() { error!("Error: {}", e); Message { role: Role::System, - content: format!("An error occurred: {:?}", e), + content: Arc::from(format!("An error occurred: {:?}", e).as_str()), } } } diff --git a/src/cloudllm/llm_session.rs b/src/cloudllm/llm_session.rs index 24038a3..23f0983 100644 --- a/src/cloudllm/llm_session.rs +++ b/src/cloudllm/llm_session.rs @@ -53,6 +53,7 @@ use crate::client_wrapper; use std::sync::Arc; // src/llm_session.rs use crate::cloudllm::client_wrapper::{ClientWrapper, Message, Role}; +use bumpalo::Bump; use openai_rust2 as openai_rust; /// A conversation session with an LLM, including: @@ -65,6 +66,7 @@ use openai_rust2 as openai_rust; /// - `total_output_tokens`: sum of all completion tokens received so far. /// - `total_context_tokens`: shortcut for input + output totals. /// - `total_token_count`: total tokens used in the current session. +/// - `arena`: bump allocator for efficient message body allocation. pub struct LLMSession { client: Arc, system_prompt: Message, @@ -73,18 +75,25 @@ pub struct LLMSession { total_input_tokens: usize, total_output_tokens: usize, total_token_count: usize, + arena: Bump, } impl LLMSession { /// Creates a new `LLMSession` with the given client and system prompt. /// Initializes the conversation history and sets a default maximum token limit. pub fn new(client: Arc, system_prompt: String, max_tokens: usize) -> Self { + let arena = Bump::new(); + + // Allocate system prompt in arena and create Arc from it + let system_prompt_str = arena.alloc_str(&system_prompt); + let system_prompt_arc: Arc = Arc::from(system_prompt_str); + // Create the system prompt message let system_prompt_message = Message { role: Role::System, - content: system_prompt, + content: system_prompt_arc, }; - // Count tokens in the system prompt message + LLMSession { client, system_prompt: system_prompt_message, @@ -93,6 +102,7 @@ impl LLMSession { total_input_tokens: 0, total_output_tokens: 0, total_token_count: 0, + arena, } } @@ -112,7 +122,14 @@ impl LLMSession { content: String, optional_search_parameters: Option, ) -> Result> { - let message = Message { role, content }; + // Allocate message content in arena and create Arc + let content_str = self.arena.alloc_str(&content); + let content_arc: Arc = Arc::from(content_str); + + let message = Message { + role, + content: content_arc, + }; // Add the new message to the conversation history self.conversation_history.push(message); @@ -163,9 +180,13 @@ impl LLMSession { /// Sets a new system prompt for the session. /// Updates the token count accordingly. pub fn set_system_prompt(&mut self, prompt: String) { + // Allocate prompt in arena and create Arc + let prompt_str = self.arena.alloc_str(&prompt); + let prompt_arc: Arc = Arc::from(prompt_str); + self.system_prompt = Message { role: Role::System, - content: prompt, + content: prompt_arc, }; } From b620a7b923998ebac2be038304c608599a77deb3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 17:18:40 +0000 Subject: [PATCH 3/4] Add unit tests for arena allocation Co-authored-by: gubatron <163977+gubatron@users.noreply.github.com> --- src/cloudllm/llm_session.rs | 78 +++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/src/cloudllm/llm_session.rs b/src/cloudllm/llm_session.rs index 23f0983..3615e4b 100644 --- a/src/cloudllm/llm_session.rs +++ b/src/cloudllm/llm_session.rs @@ -211,6 +211,84 @@ impl LLMSession { } } +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + + // Mock client for testing + struct MockClient; + + #[async_trait::async_trait] + impl ClientWrapper for MockClient { + async fn send_message( + &self, + _messages: Vec, + _optional_search_parameters: Option, + ) -> Result> { + Ok(Message { + role: Role::Assistant, + content: Arc::from("Mock response"), + }) + } + } + + #[tokio::test] + async fn test_arena_allocation() { + let client = Arc::new(MockClient); + let mut session = LLMSession::new( + client, + "Test system prompt".to_string(), + 1000, + ); + + // Send a message + let result = session.send_message( + Role::User, + "Test user message".to_string(), + None, + ).await; + + assert!(result.is_ok()); + let response = result.unwrap(); + assert_eq!(&*response.content, "Mock response"); + + // Verify system prompt is allocated correctly + assert_eq!(&*session.system_prompt.content, "Test system prompt"); + + // Verify conversation history + assert_eq!(session.conversation_history.len(), 2); // user message + assistant response + } + + #[test] + fn test_set_system_prompt() { + let client = Arc::new(MockClient); + let mut session = LLMSession::new( + client, + "Initial prompt".to_string(), + 1000, + ); + + // Change system prompt + session.set_system_prompt("Updated prompt".to_string()); + assert_eq!(&*session.system_prompt.content, "Updated prompt"); + } + + #[test] + fn test_message_content_is_arc_str() { + // Verify that Message.content is Arc and cloning is cheap + let msg = Message { + role: Role::User, + content: Arc::from("Test message"), + }; + + let cloned = msg.clone(); + + // Arc::ptr_eq checks if both Arcs point to the same allocation + assert!(Arc::ptr_eq(&msg.content, &cloned.content)); + } +} + /// Estimates the number of tokens in a string. /// Uses an approximate formula: one token per 4 characters. fn estimate_token_count(text: &str) -> usize { From 7160657a964bbde960f18b6df6146759716a35e9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 1 Oct 2025 22:01:11 +0000 Subject: [PATCH 4/4] Move arena allocation tests to external test file Co-authored-by: gubatron <163977+gubatron@users.noreply.github.com> --- src/cloudllm/client_wrapper.rs | 1 + src/cloudllm/llm_session.rs | 78 +----------------- tests/client_tests.rs | 6 +- tests/llm_session_bump_allocations_test.rs | 94 ++++++++++++++++++++++ tests/llm_session_tests.rs | 8 +- 5 files changed, 104 insertions(+), 83 deletions(-) create mode 100644 tests/llm_session_bump_allocations_test.rs diff --git a/src/cloudllm/client_wrapper.rs b/src/cloudllm/client_wrapper.rs index 6a9fb4f..b457b19 100644 --- a/src/cloudllm/client_wrapper.rs +++ b/src/cloudllm/client_wrapper.rs @@ -7,6 +7,7 @@ use openai_rust2 as openai_rust; /// and uses a ClientWrapper to interact with the LLM. // src/client_wrapper use std::error::Error; +use std::sync::Arc; use tokio::sync::Mutex; /// Represents the possible roles for a message. diff --git a/src/cloudllm/llm_session.rs b/src/cloudllm/llm_session.rs index bccd326..ff53845 100644 --- a/src/cloudllm/llm_session.rs +++ b/src/cloudllm/llm_session.rs @@ -217,83 +217,9 @@ impl LLMSession { pub fn get_cached_token_counts(&self) -> &Vec { &self.cached_token_counts } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::sync::Arc; - - // Mock client for testing - struct MockClient; - - #[async_trait::async_trait] - impl ClientWrapper for MockClient { - async fn send_message( - &self, - _messages: Vec, - _optional_search_parameters: Option, - ) -> Result> { - Ok(Message { - role: Role::Assistant, - content: Arc::from("Mock response"), - }) - } - } - - #[tokio::test] - async fn test_arena_allocation() { - let client = Arc::new(MockClient); - let mut session = LLMSession::new( - client, - "Test system prompt".to_string(), - 1000, - ); - - // Send a message - let result = session.send_message( - Role::User, - "Test user message".to_string(), - None, - ).await; - - assert!(result.is_ok()); - let response = result.unwrap(); - assert_eq!(&*response.content, "Mock response"); - // Verify system prompt is allocated correctly - assert_eq!(&*session.system_prompt.content, "Test system prompt"); - - // Verify conversation history - assert_eq!(session.conversation_history.len(), 2); // user message + assistant response - } - - #[test] - fn test_set_system_prompt() { - let client = Arc::new(MockClient); - let mut session = LLMSession::new( - client, - "Initial prompt".to_string(), - 1000, - ); - - // Change system prompt - session.set_system_prompt("Updated prompt".to_string()); - assert_eq!(&*session.system_prompt.content, "Updated prompt"); - } - - #[test] - fn test_message_content_is_arc_str() { - // Verify that Message.content is Arc and cloning is cheap - let msg = Message { - role: Role::User, - content: Arc::from("Test message"), - }; - - let cloned = msg.clone(); - - // Arc::ptr_eq checks if both Arcs point to the same allocation - assert!(Arc::ptr_eq(&msg.content, &cloned.content)); + pub fn get_system_prompt(&self) -> &Message { + &self.system_prompt } } diff --git a/tests/client_tests.rs b/tests/client_tests.rs index b2224ae..9bf0d83 100644 --- a/tests/client_tests.rs +++ b/tests/client_tests.rs @@ -43,7 +43,7 @@ fn test_claude_client() { log::error!("Error: {}", e); Message { role: System, - content: format!("An error occurred: {:?}", e), + content: format!("An error occurred: {:?}", e).into(), } }) }); @@ -129,7 +129,7 @@ pub fn test_grok_client() { log::error!("Error: {}", e); Message { role: crate::Role::System, - content: format!("An error occurred: {:?}", e), + content: format!("An error occurred: {:?}", e).into(), } }) }); @@ -168,7 +168,7 @@ fn test_openai_client() { log::error!("Error: {}", e); Message { role: Role::System, - content: format!("An error occurred: {:?}", e), + content: format!("An error occurred: {:?}", e).into(), } }) }); diff --git a/tests/llm_session_bump_allocations_test.rs b/tests/llm_session_bump_allocations_test.rs new file mode 100644 index 0000000..8488eca --- /dev/null +++ b/tests/llm_session_bump_allocations_test.rs @@ -0,0 +1,94 @@ +use cloudllm::client_wrapper::{ClientWrapper, Message, Role, TokenUsage}; +use cloudllm::LLMSession; +use async_trait::async_trait; +use openai_rust2 as openai_rust; +use std::sync::Arc; +use tokio::sync::Mutex; + +// Mock client for testing +struct MockClient { + usage: Mutex>, + response_content: String, +} + +impl MockClient { + fn new(response_content: String) -> Self { + Self { + usage: Mutex::new(None), + response_content, + } + } +} + +#[async_trait] +impl ClientWrapper for MockClient { + async fn send_message( + &self, + _messages: &[Message], + _optional_search_parameters: Option, + ) -> Result> { + Ok(Message { + role: Role::Assistant, + content: self.response_content.clone().into(), + }) + } + + fn usage_slot(&self) -> Option<&Mutex>> { + Some(&self.usage) + } +} + +#[tokio::test] +async fn test_arena_allocation() { + let mock_client = Arc::new(MockClient::new("Mock response".to_string())); + let mut session = LLMSession::new( + mock_client, + "Test system prompt".to_string(), + 1000, + ); + + // Send a message + let result = session.send_message( + Role::User, + "Test user message".to_string(), + None, + ).await; + + assert!(result.is_ok()); + let response = result.unwrap(); + assert_eq!(&*response.content, "Mock response"); + + // Verify system prompt is allocated correctly + assert_eq!(&*session.get_system_prompt().content, "Test system prompt"); + + // Verify conversation history + assert_eq!(session.get_conversation_history().len(), 2); // user message + assistant response +} + +#[test] +fn test_set_system_prompt() { + let mock_client = Arc::new(MockClient::new("Response".to_string())); + let mut session = LLMSession::new( + mock_client, + "Initial prompt".to_string(), + 1000, + ); + + // Change system prompt + session.set_system_prompt("Updated prompt".to_string()); + assert_eq!(&*session.get_system_prompt().content, "Updated prompt"); +} + +#[test] +fn test_message_content_is_arc_str() { + // Verify that Message.content is Arc and cloning is cheap + let msg = Message { + role: Role::User, + content: Arc::from("Test message"), + }; + + let cloned = msg.clone(); + + // Arc::ptr_eq checks if both Arcs point to the same allocation + assert!(Arc::ptr_eq(&msg.content, &cloned.content)); +} diff --git a/tests/llm_session_tests.rs b/tests/llm_session_tests.rs index 9e4b272..1e8c46b 100644 --- a/tests/llm_session_tests.rs +++ b/tests/llm_session_tests.rs @@ -41,7 +41,7 @@ impl ClientWrapper for MockClient { ) -> Result> { Ok(Message { role: Role::Assistant, - content: self.response_content.clone(), + content: self.response_content.clone().into(), }) } @@ -70,11 +70,11 @@ async fn test_token_caching() { // Verify token counts are cached correctly let expected_user_tokens = llm_session::estimate_message_token_count(&Message { role: Role::User, - content: user_message.to_string(), + content: user_message.to_string().into(), }); let expected_response_tokens = llm_session::estimate_message_token_count(&Message { role: Role::Assistant, - content: "Response".to_string(), + content: "Response".to_string().into(), }); assert_eq!(session.get_cached_token_counts()[0], expected_user_tokens); @@ -126,7 +126,7 @@ fn test_estimate_token_count() { fn test_estimate_message_token_count() { let message = Message { role: Role::User, - content: "test message".to_string(), + content: "test message".to_string().into(), }; // "test message" = 12 characters = 3 tokens + 1 role token = 4 tokens assert_eq!(llm_session::estimate_message_token_count(&message), 4);