From 3b1588948e5686b28b83bb3500356117d852876d Mon Sep 17 00:00:00 2001 From: Alexandre Milesi Date: Tue, 4 Nov 2025 10:00:46 -0800 Subject: [PATCH 1/3] feat: MDC decoding params, python bindings Signed-off-by: Alexandre Milesi --- lib/bindings/python/rust/lib.rs | 11 +++- lib/bindings/python/rust/llm/preprocessor.rs | 61 +++++++++++++++++++ .../python/src/dynamo/llm/__init__.py | 2 + lib/llm/src/local_model.rs | 21 +++++++ lib/llm/src/model_card.rs | 11 ++++ lib/llm/src/preprocessor/media.rs | 2 +- 6 files changed, 105 insertions(+), 3 deletions(-) diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs index fc905b45d1..7de08e6360 100644 --- a/lib/bindings/python/rust/lib.rs +++ b/lib/bindings/python/rust/lib.rs @@ -37,6 +37,7 @@ use dynamo_llm::{self as llm_rs}; use dynamo_llm::{entrypoint::RouterConfig, kv_router::KvRouterConfig}; use crate::llm::local_model::ModelRuntimeConfig; +use crate::llm::preprocessor::{MediaDecoder, MediaFetcher}; #[pyclass(eq, eq_int)] #[derive(Clone, Debug, PartialEq)] @@ -159,6 +160,8 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; @@ -215,7 +218,7 @@ fn log_message(level: &str, message: &str, module: &str, file: &str, line: u32) /// Create an engine and attach it to an endpoint to make it visible to the frontend. /// This is the main way you create a Dynamo worker / backend. #[pyfunction] -#[pyo3(signature = (model_input, model_type, endpoint, model_path, model_name=None, context_length=None, kv_cache_block_size=None, router_mode=None, migration_limit=0, runtime_config=None, user_data=None, custom_template_path=None))] +#[pyo3(signature = (model_input, model_type, endpoint, model_path, model_name=None, context_length=None, kv_cache_block_size=None, router_mode=None, migration_limit=0, runtime_config=None, user_data=None, custom_template_path=None, media_decoder=None, media_fetcher=None))] #[allow(clippy::too_many_arguments)] fn register_llm<'p>( py: Python<'p>, @@ -231,6 +234,8 @@ fn register_llm<'p>( runtime_config: Option, user_data: Option<&Bound<'p, PyDict>>, custom_template_path: Option<&str>, + media_decoder: Option, + media_fetcher: Option, ) -> PyResult> { // Validate Prefill model type requirements if model_type.inner == llm_rs::model_type::ModelType::Prefill { @@ -303,7 +308,9 @@ fn register_llm<'p>( .migration_limit(Some(migration_limit)) .runtime_config(runtime_config.unwrap_or_default().inner) .user_data(user_data_json) - .custom_template_path(custom_template_path_owned); + .custom_template_path(custom_template_path_owned) + .media_decoder(media_decoder.map(|m| m.inner)) + .media_fetcher(media_fetcher.map(|m| m.inner)); // Load the ModelDeploymentCard let mut local_model = builder.build().await.map_err(to_pyerr)?; // Advertise ourself on etcd so ingress can find us diff --git a/lib/bindings/python/rust/llm/preprocessor.rs b/lib/bindings/python/rust/llm/preprocessor.rs index 0504635377..8cc074e056 100644 --- a/lib/bindings/python/rust/llm/preprocessor.rs +++ b/lib/bindings/python/rust/llm/preprocessor.rs @@ -6,6 +6,7 @@ use crate::llm::model_card::ModelDeploymentCard; use llm_rs::{ preprocessor::OpenAIPreprocessor, + preprocessor::media::{MediaDecoder as RsMediaDecoder, MediaFetcher as RsMediaFetcher}, protocols::common::llm_backend::{BackendOutput, PreprocessedRequest}, types::{ Annotated, @@ -74,3 +75,63 @@ impl OAIChatPreprocessor { }) } } + + +#[pyclass] +#[derive(Clone)] +pub struct MediaDecoder { + pub(crate) inner: RsMediaDecoder, +} + +#[pymethods] +impl MediaDecoder { + #[new] + fn new() -> Self { + Self { + inner: RsMediaDecoder::default(), + } + } + + fn image_decoder(&mut self, image_decoder: &Bound<'_, PyDict>) -> PyResult<()> { + let image_decoder = pythonize::depythonize(image_decoder).map_err(|err| { + PyErr::new::(format!("Failed to parse image_decoder: {}", err)) + })?; + self.inner.image_decoder = image_decoder; + Ok(()) + } +} + +#[pyclass] +#[derive(Clone)] +pub struct MediaFetcher { + pub(crate) inner: RsMediaFetcher, +} + +#[pymethods] +impl MediaFetcher { + #[new] + fn new() -> Self { + Self { + inner: RsMediaFetcher::default(), + } + } + fn user_agent(&mut self, user_agent: String) { + self.inner.user_agent = user_agent; + } + + fn allow_direct_ip(&mut self, allow: bool) { + self.inner.allow_direct_ip = allow; + } + + fn allow_direct_port(&mut self, allow: bool) { + self.inner.allow_direct_port = allow; + } + + fn allowed_media_domains(&mut self, domains: Vec) { + self.inner.allowed_media_domains = Some(domains.into_iter().collect()); + } + + fn timeout_ms(&mut self, timeout_ms: u64) { + self.inner.timeout = Some(Duration::from_millis(timeout_ms)); + } +} diff --git a/lib/bindings/python/src/dynamo/llm/__init__.py b/lib/bindings/python/src/dynamo/llm/__init__.py index 8c406ba6bc..480e762ef6 100644 --- a/lib/bindings/python/src/dynamo/llm/__init__.py +++ b/lib/bindings/python/src/dynamo/llm/__init__.py @@ -28,6 +28,8 @@ from dynamo._core import RouterMode as RouterMode from dynamo._core import SpecDecodeStats as SpecDecodeStats from dynamo._core import WorkerMetricsPublisher as WorkerMetricsPublisher +from dynamo._core import MediaDecoder as MediaDecoder +from dynamo._core import MediaFetcher as MediaFetcher from dynamo._core import WorkerStats as WorkerStats from dynamo._core import ZmqKvEventListener as ZmqKvEventListener from dynamo._core import ZmqKvEventPublisher as ZmqKvEventPublisher diff --git a/lib/llm/src/local_model.rs b/lib/llm/src/local_model.rs index de869047c5..8c7604c1dc 100644 --- a/lib/llm/src/local_model.rs +++ b/lib/llm/src/local_model.rs @@ -15,6 +15,7 @@ use crate::mocker::protocols::MockEngineArgs; use crate::model_card::{self, ModelDeploymentCard}; use crate::model_type::{ModelInput, ModelType}; use crate::request_template::RequestTemplate; +use crate::preprocessor::media::{MediaDecoder, MediaFetcher}; pub mod runtime_config; @@ -52,6 +53,8 @@ pub struct LocalModelBuilder { namespace: Option, custom_backend_metrics_endpoint: Option, custom_backend_metrics_polling_interval: Option, + media_decoder: Option, + media_fetcher: Option, } impl Default for LocalModelBuilder { @@ -77,6 +80,8 @@ impl Default for LocalModelBuilder { namespace: Default::default(), custom_backend_metrics_endpoint: Default::default(), custom_backend_metrics_polling_interval: Default::default(), + media_decoder: Default::default(), + media_fetcher: Default::default(), } } } @@ -184,6 +189,16 @@ impl LocalModelBuilder { self } + pub fn media_decoder(&mut self, media_decoder: Option) -> &mut Self { + self.media_decoder = media_decoder; + self + } + + pub fn media_fetcher(&mut self, media_fetcher: Option) -> &mut Self { + self.media_fetcher = media_fetcher; + self + } + /// Make an LLM ready for use: /// - Download it from Hugging Face (and NGC in future) if necessary /// - Resolve the path @@ -219,6 +234,8 @@ impl LocalModelBuilder { self.runtime_config.max_num_batched_tokens = mocker_engine_args.max_num_batched_tokens.map(|v| v as u64); self.runtime_config.data_parallel_size = mocker_engine_args.dp_size; + self.media_decoder = Some(MediaDecoder::default()); + self.media_fetcher = Some(MediaFetcher::default()); } // frontend and echo engine don't need a path. @@ -230,6 +247,8 @@ impl LocalModelBuilder { card.migration_limit = self.migration_limit; card.user_data = self.user_data.take(); card.runtime_config = self.runtime_config.clone(); + card.media_decoder = self.media_decoder.clone(); + card.media_fetcher = self.media_fetcher.clone(); return Ok(LocalModel { card, @@ -280,6 +299,8 @@ impl LocalModelBuilder { card.migration_limit = self.migration_limit; card.user_data = self.user_data.take(); card.runtime_config = self.runtime_config.clone(); + card.media_decoder = self.media_decoder.clone(); + card.media_fetcher = self.media_fetcher.clone(); Ok(LocalModel { card, diff --git a/lib/llm/src/model_card.rs b/lib/llm/src/model_card.rs index f19c938cd2..0674e1c0a1 100644 --- a/lib/llm/src/model_card.rs +++ b/lib/llm/src/model_card.rs @@ -25,6 +25,7 @@ use dynamo_runtime::{slug::Slug, storage::key_value_store::Versioned}; use serde::{Deserialize, Serialize}; use tokenizers::Tokenizer as HfTokenizer; +use crate::preprocessor::media::{MediaDecoder, MediaFetcher}; use crate::protocols::TokenIdType; /// Identify model deployment cards in the key-value store @@ -217,6 +218,14 @@ pub struct ModelDeploymentCard { #[serde(default)] pub runtime_config: ModelRuntimeConfig, + /// Media decoding configuration + #[serde(default)] + pub media_decoder: Option, + + /// Media fetching configuration + #[serde(default)] + pub media_fetcher: Option, + #[serde(skip, default)] checksum: OnceLock, } @@ -520,6 +529,8 @@ impl ModelDeploymentCard { model_input: Default::default(), // set later user_data: None, runtime_config: ModelRuntimeConfig::default(), + media_decoder: None, + media_fetcher: None, checksum: OnceLock::new(), }) } diff --git a/lib/llm/src/preprocessor/media.rs b/lib/llm/src/preprocessor/media.rs index 5104af8e21..f4faa8e8f3 100644 --- a/lib/llm/src/preprocessor/media.rs +++ b/lib/llm/src/preprocessor/media.rs @@ -7,4 +7,4 @@ mod loader; pub use common::EncodedMediaData; pub use decoders::{Decoder, ImageDecoder, MediaDecoder}; -pub use loader::MediaLoader; +pub use loader::{MediaLoader, MediaFetcher}; From 83b099e66cd88f39661e5110d2a0c678e21be947 Mon Sep 17 00:00:00 2001 From: Alexandre Milesi Date: Wed, 5 Nov 2025 09:51:29 -0800 Subject: [PATCH 2/3] style: isort Signed-off-by: Alexandre Milesi --- lib/bindings/python/rust/llm/preprocessor.rs | 1 + lib/bindings/python/src/dynamo/llm/__init__.py | 4 ++-- lib/llm/src/local_model.rs | 2 +- lib/llm/src/preprocessor/media.rs | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/bindings/python/rust/llm/preprocessor.rs b/lib/bindings/python/rust/llm/preprocessor.rs index 8cc074e056..d5a845825f 100644 --- a/lib/bindings/python/rust/llm/preprocessor.rs +++ b/lib/bindings/python/rust/llm/preprocessor.rs @@ -3,6 +3,7 @@ use super::*; use crate::llm::model_card::ModelDeploymentCard; +use std::time::Duration; use llm_rs::{ preprocessor::OpenAIPreprocessor, diff --git a/lib/bindings/python/src/dynamo/llm/__init__.py b/lib/bindings/python/src/dynamo/llm/__init__.py index 480e762ef6..3d812b311e 100644 --- a/lib/bindings/python/src/dynamo/llm/__init__.py +++ b/lib/bindings/python/src/dynamo/llm/__init__.py @@ -18,6 +18,8 @@ from dynamo._core import KvRecorder as KvRecorder from dynamo._core import KvRouterConfig as KvRouterConfig from dynamo._core import KvStats as KvStats +from dynamo._core import MediaDecoder as MediaDecoder +from dynamo._core import MediaFetcher as MediaFetcher from dynamo._core import ModelInput as ModelInput from dynamo._core import ModelRuntimeConfig as ModelRuntimeConfig from dynamo._core import ModelType as ModelType @@ -28,8 +30,6 @@ from dynamo._core import RouterMode as RouterMode from dynamo._core import SpecDecodeStats as SpecDecodeStats from dynamo._core import WorkerMetricsPublisher as WorkerMetricsPublisher -from dynamo._core import MediaDecoder as MediaDecoder -from dynamo._core import MediaFetcher as MediaFetcher from dynamo._core import WorkerStats as WorkerStats from dynamo._core import ZmqKvEventListener as ZmqKvEventListener from dynamo._core import ZmqKvEventPublisher as ZmqKvEventPublisher diff --git a/lib/llm/src/local_model.rs b/lib/llm/src/local_model.rs index 8c7604c1dc..5c734abbd2 100644 --- a/lib/llm/src/local_model.rs +++ b/lib/llm/src/local_model.rs @@ -14,8 +14,8 @@ use crate::entrypoint::RouterConfig; use crate::mocker::protocols::MockEngineArgs; use crate::model_card::{self, ModelDeploymentCard}; use crate::model_type::{ModelInput, ModelType}; -use crate::request_template::RequestTemplate; use crate::preprocessor::media::{MediaDecoder, MediaFetcher}; +use crate::request_template::RequestTemplate; pub mod runtime_config; diff --git a/lib/llm/src/preprocessor/media.rs b/lib/llm/src/preprocessor/media.rs index f4faa8e8f3..0c0e3e6b12 100644 --- a/lib/llm/src/preprocessor/media.rs +++ b/lib/llm/src/preprocessor/media.rs @@ -7,4 +7,4 @@ mod loader; pub use common::EncodedMediaData; pub use decoders::{Decoder, ImageDecoder, MediaDecoder}; -pub use loader::{MediaLoader, MediaFetcher}; +pub use loader::{MediaFetcher, MediaLoader}; From cdf7d290e6c697a99ce71e17e2d6cfe92e7e1a50 Mon Sep 17 00:00:00 2001 From: Alexandre Milesi Date: Thu, 6 Nov 2025 09:40:21 -0800 Subject: [PATCH 3/3] style: lint Signed-off-by: Alexandre Milesi --- lib/bindings/python/rust/llm/preprocessor.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/bindings/python/rust/llm/preprocessor.rs b/lib/bindings/python/rust/llm/preprocessor.rs index d5a845825f..0fc8686f60 100644 --- a/lib/bindings/python/rust/llm/preprocessor.rs +++ b/lib/bindings/python/rust/llm/preprocessor.rs @@ -77,7 +77,6 @@ impl OAIChatPreprocessor { } } - #[pyclass] #[derive(Clone)] pub struct MediaDecoder {