Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 57 additions & 56 deletions runner/app/live/pipelines/streamdiffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import torch
from pydantic import BaseModel, Field, model_validator
from streamdiffusion import StreamDiffusionWrapper
from streamdiffusion.controlnet.preprocessors import list_preprocessors
# from streamdiffusion.controlnet.preprocessors import list_preprocessors

from .interface import Pipeline
from trickle import VideoFrame, VideoOutput
from trickle import DEFAULT_WIDTH, DEFAULT_HEIGHT

AVAILABLE_PREPROCESSORS = list_preprocessors()
AVAILABLE_PREPROCESSORS = []

class ControlNetConfig(BaseModel):
"""ControlNet configuration model"""
Expand All @@ -39,7 +39,8 @@ class Config:
model_id: Literal[
"stabilityai/sd-turbo",
"KBlueLeaf/kohaku-v2.1",
] = "stabilityai/sd-turbo"
"stabilityai/sdxl-turbo",
] = "stabilityai/sdxl-turbo"

# Generation parameters
prompt: str | List[Tuple[str, float]] = "an anime render of a girl with purple hair, masterpiece"
Expand Down Expand Up @@ -77,54 +78,54 @@ class Config:

# ControlNet settings
controlnets: Optional[List[ControlNetConfig]] = [
ControlNetConfig(
model_id="thibaud/controlnet-sd21-openpose-diffusers",
conditioning_scale=0.711,
preprocessor="pose_tensorrt",
preprocessor_params={},
enabled=True,
control_guidance_start=0.0,
control_guidance_end=1.0,
),
ControlNetConfig(
model_id="thibaud/controlnet-sd21-hed-diffusers",
conditioning_scale=0.2,
preprocessor="soft_edge",
preprocessor_params={},
enabled=True,
control_guidance_start=0.0,
control_guidance_end=1.0,
),
ControlNetConfig(
model_id="thibaud/controlnet-sd21-canny-diffusers",
conditioning_scale=0.2,
preprocessor="canny",
preprocessor_params={
"low_threshold": 100,
"high_threshold": 200
},
enabled=True,
control_guidance_start=0.0,
control_guidance_end=1.0,
),
ControlNetConfig(
model_id="thibaud/controlnet-sd21-depth-diffusers",
conditioning_scale=0.5,
preprocessor="depth_tensorrt",
preprocessor_params={},
enabled=True,
control_guidance_start=0.0,
control_guidance_end=1.0,
),
ControlNetConfig(
model_id="thibaud/controlnet-sd21-color-diffusers",
conditioning_scale=0.2,
preprocessor="passthrough",
preprocessor_params={},
enabled=True,
control_guidance_start=0.0,
control_guidance_end=1.0,
)
# ControlNetConfig(
# model_id="thibaud/controlnet-sd21-openpose-diffusers",
# conditioning_scale=0.711,
# preprocessor="pose_tensorrt",
# preprocessor_params={},
# enabled=True,
# control_guidance_start=0.0,
# control_guidance_end=1.0,
# ),
# ControlNetConfig(
# model_id="thibaud/controlnet-sd21-hed-diffusers",
# conditioning_scale=0.2,
# preprocessor="soft_edge",
# preprocessor_params={},
# enabled=True,
# control_guidance_start=0.0,
# control_guidance_end=1.0,
# ),
# ControlNetConfig(
# model_id="thibaud/controlnet-sd21-canny-diffusers",
# conditioning_scale=0.2,
# preprocessor="canny",
# preprocessor_params={
# "low_threshold": 100,
# "high_threshold": 200
# },
# enabled=True,
# control_guidance_start=0.0,
# control_guidance_end=1.0,
# ),
# ControlNetConfig(
# model_id="thibaud/controlnet-sd21-depth-diffusers",
# conditioning_scale=0.5,
# preprocessor="depth_tensorrt",
# preprocessor_params={},
# enabled=True,
# control_guidance_start=0.0,
# control_guidance_end=1.0,
# ),
# ControlNetConfig(
# model_id="thibaud/controlnet-sd21-color-diffusers",
# conditioning_scale=0.2,
# preprocessor="passthrough",
# preprocessor_params={},
# enabled=True,
# control_guidance_start=0.0,
# control_guidance_end=1.0,
# )
]

@model_validator(mode="after")
Expand Down Expand Up @@ -173,12 +174,12 @@ def process_tensor_sync(self, img_tensor: torch.Tensor):

# The incoming frame.tensor is (B, H, W, C) in range [-1, 1] while the
# VaeImageProcessor inside the wrapper expects (B, C, H, W) in [0, 1].
img_tensor = img_tensor.permute(0, 3, 1, 2)
img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor))
img_tensor = self.pipe.preprocess_image(img_tensor)
# img_tensor = img_tensor.permute(0, 3, 1, 2)
# img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor))
# img_tensor = self.pipe.preprocess_image(img_tensor)

# Noop if ControlNets are not enabled
self.pipe.update_control_image_efficient(img_tensor)
if self.params and self.params.controlnets:
self.pipe.update_control_image_efficient(img_tensor)

if self.first_frame:
self.first_frame = False
Expand Down
17 changes: 15 additions & 2 deletions runner/app/live/streamer/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from log import config_logging, config_logging_fields, log_timing
from trickle import InputFrame, AudioFrame, VideoFrame, OutputFrame, VideoOutput, AudioOutput

from diffusers.image_processor import VaeImageProcessor

class PipelineProcess:
@staticmethod
def start(pipeline_name: str, params: dict):
Expand All @@ -40,6 +42,8 @@ def __init__(self, pipeline_name: str):
self.start_time = 0.0
self.request_id = ""

self.image_processor = VaeImageProcessor()

def is_alive(self):
return self.process.is_alive()

Expand Down Expand Up @@ -96,8 +100,15 @@ def reset_stream(self, request_id: str, manifest_id: str, stream_id: str):
# TODO: Once audio is implemented, combined send_input with input_loop
# We don't need additional queueing as comfystream already maintains a queue
def send_input(self, frame: InputFrame):
if isinstance(frame, VideoFrame) and not frame.tensor.is_cuda and torch.cuda.is_available():
frame = frame.replace_tensor(frame.tensor.cuda())
if isinstance(frame, VideoFrame):
img_tensor = frame.tensor
if not img_tensor.is_cuda and torch.cuda.is_available():
img_tensor = img_tensor.cuda()
img_tensor = img_tensor.permute(0, 3, 1, 2)
img_tensor = self.image_processor.denormalize(img_tensor)
# img_tensor = self.image_processor.preprocess(img_tensor)
frame = frame.replace_tensor(img_tensor)

self._try_queue_put(self.input_queue, frame)

async def recv_output(self) -> OutputFrame | None:
Expand Down Expand Up @@ -170,6 +181,7 @@ async def _initialize_pipeline(self):
return pipeline
except Exception as e:
self._report_error(f"Error loading pipeline: {e}")
logging.exception(e)
if not params:
# Already tried loading with default params
raise
Expand All @@ -182,6 +194,7 @@ async def _initialize_pipeline(self):
return pipeline
except Exception as e:
self._report_error(f"Error loading pipeline with default params: {e}")
logging.exception(e)
raise

async def _run_pipeline_loops(self):
Expand Down
2 changes: 1 addition & 1 deletion runner/app/live/trickle/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from .frame import InputFrame

MAX_FRAMERATE=24
MAX_FRAMERATE=120

def decode_av(pipe_input, frame_callback, put_metadata, target_width, target_height):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set -e
CONDA_PYTHON="/workspace/miniconda3/envs/comfystream/bin/python"
MODELS="stabilityai/sd-turbo KBlueLeaf/kohaku-v2.1"
TIMESTEPS="3 4" # This is basically the supported sizes for the t_index_list
DIMENSIONS="512x512" # Engines are now compiled for the 384-1024 range, but keep this in case it's useful in the future
DIMENSIONS="1024x1024" # Engines are now compiled for the 384-1024 range, but keep this in case it's useful in the future
CONTROLNETS="" # Default empty, will be set from command line

# Function to display help
Expand Down
7 changes: 4 additions & 3 deletions runner/dl_checkpoints.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ function download_streamdiffusion_live_models() {
# StreamDiffusion
huggingface-cli download KBlueLeaf/kohaku-v2.1 --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
huggingface-cli download stabilityai/sd-turbo --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
huggingface-cli download stabilityai/sdxl-turbo --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models

# ControlNet models
huggingface-cli download thibaud/controlnet-sd21-openpose-diffusers --include "*.bin" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
Expand Down Expand Up @@ -213,9 +214,9 @@ function build_streamdiffusion_tensorrt() {

docker run --rm -v ./models:/models --gpus all -l TensorRT-engines $AI_RUNNER_STREAMDIFFUSION_IMAGE \
bash -c "./app/tools/streamdiffusion/build_tensorrt_internal.sh \
--models 'stabilityai/sd-turbo KBlueLeaf/kohaku-v2.1' \
--timesteps '1 2 3 4' \
--controlnets 'thibaud/controlnet-sd21-openpose-diffusers thibaud/controlnet-sd21-hed-diffusers thibaud/controlnet-sd21-canny-diffusers thibaud/controlnet-sd21-depth-diffusers thibaud/controlnet-sd21-color-diffusers' \
--models 'stabilityai/sdxl-turbo' \
--timesteps '1 2 3' \
--controlnets '' \
--build-depth-anything \
--build-pose \
&& \
Expand Down
4 changes: 2 additions & 2 deletions runner/docker/Dockerfile.live-base-streamdiffusion
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ RUN conda run -n comfystream pip install --no-cache-dir --force-reinstall \
conda run -n comfystream pip install --no-cache-dir \
xformers==0.0.30 --no-deps

# Install StreamDiffusion @ v0.0.1-cnet.4 into the comfystream environment
RUN conda run -n comfystream pip install git+https://github.com/livepeer/StreamDiffusion.git@v0.0.1-cnet.4#egg=streamdiffusion[tensorrt]
# Install StreamDiffusion @ 902036d into the comfystream environment
RUN conda run -n comfystream pip install git+https://github.com/livepeer/StreamDiffusion.git@902036df74ad4a63b2d179a0e6dcc955e6f54c98#egg=streamdiffusion[tensorrt]

# Pin versions of ONNX runtime which are too loose on streamdiffusion setup.py
RUN conda run -n comfystream pip install --no-cache-dir \
Expand Down