Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions lib/bindings/python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion lib/llm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ testing-etcd = []
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:nix", "dep:aligned-vec"]
cuda = ["dep:cudarc"]
integration = ["dynamo-runtime/integration"]
media-nixl = ["dep:nixl-sys", "dep:dynamo-memory"]

[[bench]]
name = "tokenizer"
Expand All @@ -33,9 +34,11 @@ harness = false
name = "transfer_context_v2"
harness = false
required-features = ["block-manager", "testing-cuda"]

[dependencies]
# repo
dynamo-runtime = { workspace = true }
dynamo-memory = { path = "../memory", version = "0.7.0", optional = true }

# workspace
aho-corasick = "1.1"
Expand Down Expand Up @@ -145,7 +148,7 @@ json-five = { version = "0.3" }
# media loading in the preprocessor
reqwest = { workspace = true }
base64 = { version = "0.22" }
image = { version = "0.25" }
image = { version = "0.25", features = ["serde"] }
tokio-rayon = {version = "2" }
ndarray = { version = "0.16" }

Expand Down
43 changes: 32 additions & 11 deletions lib/llm/src/preprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ use std::{collections::HashMap, pin::Pin, sync::Arc};
use tracing;

use crate::model_card::{ModelDeploymentCard, ModelInfo};
use crate::preprocessor::media::MediaLoader;
#[cfg(feature = "media-nixl")]
use crate::preprocessor::media::{MediaDecoder, MediaFetcher, MediaLoader};
use crate::preprocessor::prompt::OAIChatLikeRequest;
use crate::protocols::common::preprocessor::{
MultimodalData, MultimodalDataMap, PreprocessedRequestBuilder,
Expand Down Expand Up @@ -114,6 +115,7 @@ pub struct OpenAIPreprocessor {
/// Per-model runtime configuration propagated to response generator (e.g., reasoning/tool parser)
runtime_config: crate::local_model::runtime_config::ModelRuntimeConfig,
tool_call_parser: Option<String>,
#[cfg(feature = "media-nixl")]
media_loader: Option<MediaLoader>,
}

Expand Down Expand Up @@ -143,14 +145,21 @@ impl OpenAIPreprocessor {

// // Initialize runtime config from the ModelDeploymentCard
let runtime_config = mdc.runtime_config.clone();
let media_loader = None; // TODO: enable with decoder config from MDC

#[cfg(feature = "media-nixl")]
let media_loader = match mdc.media_decoder {
Some(media_decoder) => Some(MediaLoader::new(media_decoder, mdc.media_fetcher)?),
None => None,
};

Ok(Arc::new(Self {
formatter,
tokenizer,
model_info,
mdcsum,
runtime_config,
tool_call_parser,
#[cfg(feature = "media-nixl")]
media_loader,
}))
}
Expand Down Expand Up @@ -279,7 +288,9 @@ impl OpenAIPreprocessor {
let messages = request.messages();
let message_count = messages.len().unwrap_or(0);
let mut media_map: MultimodalDataMap = HashMap::new();
let mut fetch_tasks = Vec::new();
#[cfg(feature = "media-nixl")]
let mut fetch_tasks: Vec<(String, ChatCompletionRequestUserMessageContentPart)> =
Vec::new();

for idx in 0..message_count {
let msg = messages
Expand Down Expand Up @@ -312,29 +323,39 @@ impl OpenAIPreprocessor {
_ => continue,
};

#[cfg(feature = "media-nixl")]
if self.media_loader.is_some() {
fetch_tasks.push((type_str, content_part.clone()));
} else {
// No loader, just pass the URL through
media_map
.entry(type_str)
.or_default()
.push(MultimodalData::Url(url));
continue;
}

//Fallback: ust pass the URL through
media_map
.entry(type_str)
.or_default()
.push(MultimodalData::Url(url));
}
}

// Execute all fetch tasks
#[cfg(feature = "media-nixl")]
if !fetch_tasks.is_empty() {
let loader = self.media_loader.as_ref().unwrap();
let _results = futures::future::join_all(
let results = futures::future::join_all(
fetch_tasks
.iter()
.map(|(_, content_part)| loader.fetch_and_decode_media_part(content_part)),
)
.await;

// TODO: decode and pass NIXL descriptors to the media map
for ((type_str, _), result) in fetch_tasks.into_iter().zip(results.into_iter()) {
// if one item fails, errors the whole request, other items will be cleaned up by Drop
let rdma_descriptor = result?;
media_map
.entry(type_str)
.or_default()
.push(MultimodalData::Decoded(rdma_descriptor));
}
}

if !media_map.is_empty() {
Expand Down
5 changes: 5 additions & 0 deletions lib/llm/src/preprocessor/media.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
mod common;
mod decoders;
mod loader;
mod rdma;

pub use common::EncodedMediaData;
pub use decoders::{Decoder, ImageDecoder, MediaDecoder};
pub use loader::{MediaFetcher, MediaLoader};

pub use rdma::{DecodedMediaData, RdmaMediaDataDescriptor};
#[cfg(feature = "media-nixl")]
pub use rdma::{get_nixl_agent, get_nixl_metadata};
63 changes: 63 additions & 0 deletions lib/llm/src/preprocessor/media/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Media decoding in the frontend


This component performs media download, base64 decoding, media decoding and NIXL registration. Today, this is used in the OpenAI preprocessor, to transform multimodal inputs (image_url, video_url, audio_url) into fully decoded data (pixel values, ...) accessible to the backends via NIXL.

## Usage

Media decoding is enabled when registering the MDC:

Set HTTP download options:

```python
from dynamo.llm import MediaFetcher
fetcher = MediaFetcher()
fetcher.user_agent("dynamo")
fetcher.timeout_ms(15000)
fetcher.allow_direct_ip(True)
fetcher.allow_direct_port(False)
fetcher.allowed_media_domains(["google.com"])
```

Set media decoding options:

```python
from dynamo.llm import MediaDecoder
decoder = MediaDecoder()
decoder.image_decoder({"max_image_width": 4096, "max_image_height": 4096, "max_alloc": 16*1024*1024})
```

And register the LLM as usual, adding the media configuration:

```python
register_llm(
...,
media_decoder=decoder,
media_fetcher=fetcher,
)
```


## TODOs

### Modalities

- [x] Image decoding
- [ ] Video decoding
- [ ] Audio decoding

### Performance

- [x] Image SW decoding
- [ ] Video HW decoding (NVDEC)
- [ ] JPEG HW decoding (nvJPEG)
- [ ] Sparse video sampling (seek-forward)
- [ ] Memory slab pre-allocation/registration

### Memory management
- [ ] Memory spilling to lower storage tiers
- [ ] Early-free memory on client notifications

### Misc
- [ ] Observability on performance, memory usage and input distributions
- [ ] Per-request decoding options
49 changes: 8 additions & 41 deletions lib/llm/src/preprocessor/media/decoders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,14 @@
// SPDX-License-Identifier: Apache-2.0

use anyhow::Result;
use serde::{Deserialize, Serialize};

use super::common::EncodedMediaData;
use ndarray::{ArrayBase, Dimension, OwnedRepr};
mod image;
use super::rdma::DecodedMediaData;
pub mod image;

pub use image::{ImageDecoder, ImageMetadata};

#[derive(Debug)]
pub enum DecodedMediaMetadata {
#[allow(dead_code)] // used in followup MR
Image(ImageMetadata),
}

#[derive(Debug, PartialEq, Eq)]
pub enum DataType {
UINT8,
}

// Decoded media data (image RGB, video frames pixels, ...)
#[derive(Debug)]
pub struct DecodedMediaData {
#[allow(dead_code)] // used in followup MR
pub(crate) data: Vec<u8>,
#[allow(dead_code)] // used in followup MR
pub(crate) shape: Vec<usize>,
#[allow(dead_code)] // used in followup MR
pub(crate) dtype: DataType,
#[allow(dead_code)] // used in followup MR
pub(crate) metadata: Option<DecodedMediaMetadata>,
}

// convert Array{N}<u8> to DecodedMediaData
// TODO: Array1<f32> for audio
impl<D: Dimension> From<ArrayBase<OwnedRepr<u8>, D>> for DecodedMediaData {
fn from(array: ArrayBase<OwnedRepr<u8>, D>) -> Self {
let shape = array.shape().to_vec();
let (data, _) = array.into_raw_vec_and_offset();
Self {
data,
shape,
dtype: DataType::UINT8,
metadata: None,
}
}
}

#[async_trait::async_trait]
pub trait Decoder: Clone + Send + 'static {
fn decode(&self, data: EncodedMediaData) -> Result<DecodedMediaData>;
Expand All @@ -67,3 +29,8 @@ pub struct MediaDecoder {
pub image_decoder: ImageDecoder,
// TODO: video, audio decoders
}

#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
pub enum DecodedMediaMetadata {
Image(ImageMetadata),
}
Loading
Loading