diff --git a/.gitignore b/.gitignore index 3579bb0..7e45607 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ */target */*/target/ -*/pkg/*.wasm -*/pkg/ui +**/pkg/*.wasm +**/pkg/ui */*.swp */*.swo */*/wasi_snapshot_preview1.wasm diff --git a/Cargo.lock b/Cargo.lock index 374f608..f9d8f42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1145,7 +1145,7 @@ dependencies = [ "anyhow", "bincode", "kinode_process_lib", - "llm_interface", + "llm_interface 0.1.0 (git+https://github.com/kinode-dao/llm/?branch=interface)", "multipart", "serde", "serde_json", @@ -1176,6 +1176,17 @@ version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +[[package]] +name = "llm_interface" +version = "0.1.0" +source = "git+https://github.com/kinode-dao/llm/?branch=hf/interface#d517de166c761139c1c52bd403bfc27f3a046429" +dependencies = [ + "anyhow", + "derive_builder", + "serde", + "serde_json", +] + [[package]] name = "llm_interface" version = "0.1.0" @@ -1381,7 +1392,7 @@ dependencies = [ "anyhow", "bincode", "kinode_process_lib", - "llm_interface", + "llm_interface 0.1.0 (git+https://github.com/kinode-dao/llm/?branch=hf/interface)", "multipart", "serde", "serde_json", diff --git a/README.md b/README.md index 6cc70bb..a071403 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,21 @@ Run the tester script in your fakenode: lccp_tester:llm:kinode ``` -Within the tester, you can see how different requests and responses are handled. +Within the tester, you can see how different requests and responses are handled. + +### Running local LLMs with Llamafile + +TODO + +https://github.com/Mozilla-Ocho/llamafile + +``` +m our@openai:openai:appattacc.os '{"RegisterOaiProviderEndpoint": {"endpoint": "http://127.0.0.1:8080/v1"}}' + +m our@openai:openai:appattacc.os '{"OaiProviderChat": {"model": "", "messages": [{"role": "user", "content": "Suggest a Shakespeare play for me to read. Be concise."}]}}' -a 60 + +kit i openai:openai:appattacc.os '{"OaiProviderChat": {"model": "", "messages": [{"role": "user", "content": "Suggest a Shakespeare play for me to read."}]}}' -p 8081 +``` ## Online APIs diff --git a/openai/openai/Cargo.toml b/openai/openai/Cargo.toml index 05cd3c5..f906325 100644 --- a/openai/openai/Cargo.toml +++ b/openai/openai/Cargo.toml @@ -7,11 +7,11 @@ edition = "2021" anyhow = "1.0" bincode = "1.3.3" kinode_process_lib = { git = "https://github.com/kinode-dao/process_lib", tag = "v0.6.0" } +llm_interface = { git = "https://github.com/kinode-dao/llm/", branch = "hf/interface" } multipart = "0.18.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" wit-bindgen = { git = "https://github.com/bytecodealliance/wit-bindgen", rev = "21a46c7" } -llm_interface = { git = "https://github.com/kinode-dao/llm/", branch = "interface" } [lib] crate-type = ["cdylib"] diff --git a/openai/openai/src/helpers.rs b/openai/openai/src/helpers.rs index 1df8504..3fe3751 100644 --- a/openai/openai/src/helpers.rs +++ b/openai/openai/src/helpers.rs @@ -9,6 +9,8 @@ pub const EMBEDDING_CONTEXT: u8 = 1; pub const OPENAI_CHAT_CONTEXT: u8 = 2; pub const GROQ_CHAT_CONTEXT: u8 = 3; pub const CHAT_IMAGE_CONTEXT: u8 = 4; +pub const REGISTER_ENDPOINT_CONTEXT: u8 = 5; +pub const OAI_PROVIDER_CHAT_CONTEXT: u8 = 6; // TODO: Zena: We should probably derive this through a trait at some point? pub fn request_to_context(request: &LLMRequest) -> u8 { @@ -18,6 +20,8 @@ pub fn request_to_context(request: &LLMRequest) -> u8 { LLMRequest::OpenaiChat(_) => OPENAI_CHAT_CONTEXT, LLMRequest::GroqChat(_) => GROQ_CHAT_CONTEXT, LLMRequest::ChatImage(_) => CHAT_IMAGE_CONTEXT, + LLMRequest::RegisterOaiProviderEndpoint(_) => REGISTER_ENDPOINT_CONTEXT, + LLMRequest::OaiProviderChat(_) => OAI_PROVIDER_CHAT_CONTEXT, } } diff --git a/openai/openai/src/lib.rs b/openai/openai/src/lib.rs index 93e6af0..5d1a86a 100644 --- a/openai/openai/src/lib.rs +++ b/openai/openai/src/lib.rs @@ -5,7 +5,7 @@ use kinode_process_lib::{ println, Address, LazyLoadBlob, ProcessId, Request, Response, }; use llm_interface::openai::{ - ChatResponse, LLMRequest, LLMResponse, RegisterApiKeyRequest, + ChatResponse, ErrorResponse, LLMRequest, LLMResponse, RegisterApiKeyRequest, RegisterEndpointRequest, }; use serde::Serialize; use std::{collections::HashMap, vec}; @@ -18,6 +18,8 @@ use helpers::*; pub const OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; pub const GROQ_BASE_URL: &str = "https://api.groq.com/openai/v1"; +const DEFAULT_TIMEOUT_SECONDS: u16 = 30; +const OAI_PROVIDER_TIMEOUT_SECONDS: u16 = 60; wit_bindgen::generate!({ path: "wit", @@ -30,7 +32,7 @@ wit_bindgen::generate!({ fn handle_response(context: &[u8]) -> anyhow::Result<()> { match context[0] { EMBEDDING_CONTEXT => handle_embedding_response()?, - OPENAI_CHAT_CONTEXT | GROQ_CHAT_CONTEXT | CHAT_IMAGE_CONTEXT => handle_chat_response()?, + OPENAI_CHAT_CONTEXT | GROQ_CHAT_CONTEXT | CHAT_IMAGE_CONTEXT | OAI_PROVIDER_CHAT_CONTEXT => handle_chat_response()?, _ => {} } @@ -63,21 +65,40 @@ fn handle_request(body: &[u8], state: &mut Option) -> anyhow::Result<()> match &request { LLMRequest::RegisterOpenaiApiKey(api_request) => register_openai_api_key(api_request, state), LLMRequest::RegisterGroqApiKey(api_request) => register_groq_api_key(api_request, state), + LLMRequest::RegisterOaiProviderEndpoint(endpoint_request) => register_oai_provider_endpoint(endpoint_request, state), LLMRequest::Embedding(embedding_request) => { let endpoint = format!("{}/embeddings", OPENAI_BASE_URL); - handle_generic_request(embedding_request, state, context, &endpoint) + handle_generic_request(embedding_request, state, context, &endpoint, None) } LLMRequest::OpenaiChat(chat_request) => { let endpoint = format!("{}/chat/completions", OPENAI_BASE_URL); - handle_generic_request(chat_request, state, context, &endpoint) + handle_generic_request(chat_request, state, context, &endpoint, None) } LLMRequest::GroqChat(chat_request) => { let endpoint = format!("{}/chat/completions", GROQ_BASE_URL); - handle_generic_request(chat_request, state, context, &endpoint) + handle_generic_request(chat_request, state, context, &endpoint, None) } LLMRequest::ChatImage(chat_image_request) => { let endpoint = format!("{}/chat/completions", OPENAI_BASE_URL); - handle_generic_request(chat_image_request, state, context, &endpoint) + handle_generic_request(chat_image_request, state, context, &endpoint, None) + } + LLMRequest::OaiProviderChat(chat_request) => { + let Some(s) = state else { + let err = "state must be set before calling OaiProviderChat"; + Response::new().body(serde_json::to_vec(&LLMResponse::Err( + ErrorResponse { error: err.to_string() } + ))?).send()?; + return Err(anyhow::anyhow!(err)); + }; + let Some(ref base_url) = s.oai_provider_base_url else { + let err = "oai_provider_base_url must be set before calling OaiProviderChat"; + Response::new().body(serde_json::to_vec(&LLMResponse::Err( + ErrorResponse { error: err.to_string() } + ))?).send()?; + return Err(anyhow::anyhow!(err)); + }; + let endpoint = format!("{}/chat/completions", base_url); + handle_generic_request(chat_request, state, context, &endpoint, Some(OAI_PROVIDER_TIMEOUT_SECONDS)) } } } @@ -128,33 +149,59 @@ fn register_groq_api_key( Ok(()) } +fn register_oai_provider_endpoint( + endpoint_request: &RegisterEndpointRequest, + state: &mut Option, +) -> anyhow::Result<()> { + let endpoint = &endpoint_request.endpoint; + match state { + Some(_state) => { + _state.oai_provider_base_url = Some(endpoint.to_string()); + _state.save(); + } + None => { + let _state = State { + oai_provider_base_url: Some(endpoint.to_string()), + ..State::default() + }; + _state.save(); + *state = Some(_state); + } + } + let _ = Response::new().body(serde_json::to_vec(&LLMResponse::Ok)?).send(); + Ok(()) +} + fn handle_generic_request( request_data: &T, state: &mut Option, context: u8, endpoint: &str, + timeout: Option, ) -> anyhow::Result<()> { let api_key = match context { - OPENAI_CHAT_CONTEXT | EMBEDDING_CONTEXT | CHAT_IMAGE_CONTEXT => state + OPENAI_CHAT_CONTEXT | EMBEDDING_CONTEXT | CHAT_IMAGE_CONTEXT => Some(state .as_ref() .ok_or_else(|| anyhow::anyhow!("State not initialized"))? .openai_api_key - .clone(), - GROQ_CHAT_CONTEXT => state + .clone()), + GROQ_CHAT_CONTEXT => Some(state .as_ref() .ok_or_else(|| anyhow::anyhow!("State not initialized"))? .groq_api_key - .clone(), + .clone()), + OAI_PROVIDER_CHAT_CONTEXT => None, _ => return Err(anyhow::anyhow!("Invalid context for API key")), }; + let mut headers = vec![("Content-Type".to_string(), "application/json".to_string())]; + if let Some(api_key) = api_key { + headers.push(("Authorization".to_string(), format!("Bearer {}", api_key))); + }; let outgoing_request = OutgoingHttpRequest { method: "POST".to_string(), version: None, url: endpoint.to_string(), - headers: HashMap::from_iter(vec![ - ("Content-Type".to_string(), "application/json".to_string()), - ("Authorization".to_string(), format!("Bearer {}", api_key)), - ]), + headers: HashMap::from_iter(headers), }; let body = serde_json::to_vec(&HttpClientAction::Http(outgoing_request))?; let bytes = serialize_without_none(request_data)?; @@ -164,7 +211,7 @@ fn handle_generic_request( ProcessId::new(Some("http_client"), "distro", "sys"), )) .body(body) - .expects_response(30) + .expects_response(timeout.unwrap_or_else(|| DEFAULT_TIMEOUT_SECONDS) as u64) .context(vec![context]) .blob(LazyLoadBlob { mime: Some("application/json".to_string()), diff --git a/openai/openai/src/structs.rs b/openai/openai/src/structs.rs index 55d37a3..ee36ab1 100644 --- a/openai/openai/src/structs.rs +++ b/openai/openai/src/structs.rs @@ -5,6 +5,7 @@ use kinode_process_lib::{get_state, set_state}; pub struct State { pub openai_api_key: String, pub groq_api_key: String, + pub oai_provider_base_url: Option, } impl State { @@ -21,4 +22,3 @@ impl State { set_state(&serialized_state); } } - diff --git a/openai/pkg/manifest.json b/openai/pkg/manifest.json index 582269d..7ec9191 100644 --- a/openai/pkg/manifest.json +++ b/openai/pkg/manifest.json @@ -2,13 +2,15 @@ { "process_name": "openai", "process_wasm_path": "/openai.wasm", - "on_exit": "None", + "on_exit": "None", "request_networking": true, "request_capabilities": [ - "http_client:distro:sys" + "http_client:distro:sys", + "http_server:distro:sys" ], "grant_capabilities": [ - "http_client:distro:sys" + "http_client:distro:sys", + "http_server:distro:sys" ], "public": true } diff --git a/openai/pkg/openai.wasm b/openai/pkg/openai.wasm deleted file mode 100644 index bc592eb..0000000 Binary files a/openai/pkg/openai.wasm and /dev/null differ