diff --git a/runner/app/live/pipelines/streamdiffusion.py b/runner/app/live/pipelines/streamdiffusion.py index aa0464293..b4dfe7077 100644 --- a/runner/app/live/pipelines/streamdiffusion.py +++ b/runner/app/live/pipelines/streamdiffusion.py @@ -6,13 +6,13 @@ import torch from pydantic import BaseModel, Field, model_validator from streamdiffusion import StreamDiffusionWrapper -from streamdiffusion.controlnet.preprocessors import list_preprocessors +# from streamdiffusion.controlnet.preprocessors import list_preprocessors from .interface import Pipeline from trickle import VideoFrame, VideoOutput from trickle import DEFAULT_WIDTH, DEFAULT_HEIGHT -AVAILABLE_PREPROCESSORS = list_preprocessors() +AVAILABLE_PREPROCESSORS = [] class ControlNetConfig(BaseModel): """ControlNet configuration model""" @@ -39,7 +39,8 @@ class Config: model_id: Literal[ "stabilityai/sd-turbo", "KBlueLeaf/kohaku-v2.1", - ] = "stabilityai/sd-turbo" + "stabilityai/sdxl-turbo", + ] = "stabilityai/sdxl-turbo" # Generation parameters prompt: str | List[Tuple[str, float]] = "an anime render of a girl with purple hair, masterpiece" @@ -77,54 +78,54 @@ class Config: # ControlNet settings controlnets: Optional[List[ControlNetConfig]] = [ - ControlNetConfig( - model_id="thibaud/controlnet-sd21-openpose-diffusers", - conditioning_scale=0.711, - preprocessor="pose_tensorrt", - preprocessor_params={}, - enabled=True, - control_guidance_start=0.0, - control_guidance_end=1.0, - ), - ControlNetConfig( - model_id="thibaud/controlnet-sd21-hed-diffusers", - conditioning_scale=0.2, - preprocessor="soft_edge", - preprocessor_params={}, - enabled=True, - control_guidance_start=0.0, - control_guidance_end=1.0, - ), - ControlNetConfig( - model_id="thibaud/controlnet-sd21-canny-diffusers", - conditioning_scale=0.2, - preprocessor="canny", - preprocessor_params={ - "low_threshold": 100, - "high_threshold": 200 - }, - enabled=True, - control_guidance_start=0.0, - control_guidance_end=1.0, - ), - ControlNetConfig( - model_id="thibaud/controlnet-sd21-depth-diffusers", - conditioning_scale=0.5, - preprocessor="depth_tensorrt", - preprocessor_params={}, - enabled=True, - control_guidance_start=0.0, - control_guidance_end=1.0, - ), - ControlNetConfig( - model_id="thibaud/controlnet-sd21-color-diffusers", - conditioning_scale=0.2, - preprocessor="passthrough", - preprocessor_params={}, - enabled=True, - control_guidance_start=0.0, - control_guidance_end=1.0, - ) + # ControlNetConfig( + # model_id="thibaud/controlnet-sd21-openpose-diffusers", + # conditioning_scale=0.711, + # preprocessor="pose_tensorrt", + # preprocessor_params={}, + # enabled=True, + # control_guidance_start=0.0, + # control_guidance_end=1.0, + # ), + # ControlNetConfig( + # model_id="thibaud/controlnet-sd21-hed-diffusers", + # conditioning_scale=0.2, + # preprocessor="soft_edge", + # preprocessor_params={}, + # enabled=True, + # control_guidance_start=0.0, + # control_guidance_end=1.0, + # ), + # ControlNetConfig( + # model_id="thibaud/controlnet-sd21-canny-diffusers", + # conditioning_scale=0.2, + # preprocessor="canny", + # preprocessor_params={ + # "low_threshold": 100, + # "high_threshold": 200 + # }, + # enabled=True, + # control_guidance_start=0.0, + # control_guidance_end=1.0, + # ), + # ControlNetConfig( + # model_id="thibaud/controlnet-sd21-depth-diffusers", + # conditioning_scale=0.5, + # preprocessor="depth_tensorrt", + # preprocessor_params={}, + # enabled=True, + # control_guidance_start=0.0, + # control_guidance_end=1.0, + # ), + # ControlNetConfig( + # model_id="thibaud/controlnet-sd21-color-diffusers", + # conditioning_scale=0.2, + # preprocessor="passthrough", + # preprocessor_params={}, + # enabled=True, + # control_guidance_start=0.0, + # control_guidance_end=1.0, + # ) ] @model_validator(mode="after") @@ -173,12 +174,12 @@ def process_tensor_sync(self, img_tensor: torch.Tensor): # The incoming frame.tensor is (B, H, W, C) in range [-1, 1] while the # VaeImageProcessor inside the wrapper expects (B, C, H, W) in [0, 1]. - img_tensor = img_tensor.permute(0, 3, 1, 2) - img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor)) - img_tensor = self.pipe.preprocess_image(img_tensor) + # img_tensor = img_tensor.permute(0, 3, 1, 2) + # img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor)) + # img_tensor = self.pipe.preprocess_image(img_tensor) - # Noop if ControlNets are not enabled - self.pipe.update_control_image_efficient(img_tensor) + if self.params and self.params.controlnets: + self.pipe.update_control_image_efficient(img_tensor) if self.first_frame: self.first_frame = False diff --git a/runner/app/live/streamer/process.py b/runner/app/live/streamer/process.py index 45ad3e4ae..0c1ca467e 100644 --- a/runner/app/live/streamer/process.py +++ b/runner/app/live/streamer/process.py @@ -14,6 +14,8 @@ from log import config_logging, config_logging_fields, log_timing from trickle import InputFrame, AudioFrame, VideoFrame, OutputFrame, VideoOutput, AudioOutput +from diffusers.image_processor import VaeImageProcessor + class PipelineProcess: @staticmethod def start(pipeline_name: str, params: dict): @@ -40,6 +42,8 @@ def __init__(self, pipeline_name: str): self.start_time = 0.0 self.request_id = "" + self.image_processor = VaeImageProcessor() + def is_alive(self): return self.process.is_alive() @@ -96,8 +100,15 @@ def reset_stream(self, request_id: str, manifest_id: str, stream_id: str): # TODO: Once audio is implemented, combined send_input with input_loop # We don't need additional queueing as comfystream already maintains a queue def send_input(self, frame: InputFrame): - if isinstance(frame, VideoFrame) and not frame.tensor.is_cuda and torch.cuda.is_available(): - frame = frame.replace_tensor(frame.tensor.cuda()) + if isinstance(frame, VideoFrame): + img_tensor = frame.tensor + if not img_tensor.is_cuda and torch.cuda.is_available(): + img_tensor = img_tensor.cuda() + img_tensor = img_tensor.permute(0, 3, 1, 2) + img_tensor = self.image_processor.denormalize(img_tensor) + # img_tensor = self.image_processor.preprocess(img_tensor) + frame = frame.replace_tensor(img_tensor) + self._try_queue_put(self.input_queue, frame) async def recv_output(self) -> OutputFrame | None: @@ -170,6 +181,7 @@ async def _initialize_pipeline(self): return pipeline except Exception as e: self._report_error(f"Error loading pipeline: {e}") + logging.exception(e) if not params: # Already tried loading with default params raise @@ -182,6 +194,7 @@ async def _initialize_pipeline(self): return pipeline except Exception as e: self._report_error(f"Error loading pipeline with default params: {e}") + logging.exception(e) raise async def _run_pipeline_loops(self): diff --git a/runner/app/live/trickle/decoder.py b/runner/app/live/trickle/decoder.py index d240bdf96..d9b5796c4 100644 --- a/runner/app/live/trickle/decoder.py +++ b/runner/app/live/trickle/decoder.py @@ -9,7 +9,7 @@ from .frame import InputFrame -MAX_FRAMERATE=24 +MAX_FRAMERATE=120 def decode_av(pipe_input, frame_callback, put_metadata, target_width, target_height): """ diff --git a/runner/app/tools/streamdiffusion/build_tensorrt_internal.sh b/runner/app/tools/streamdiffusion/build_tensorrt_internal.sh index 5773a979b..a0fc75372 100755 --- a/runner/app/tools/streamdiffusion/build_tensorrt_internal.sh +++ b/runner/app/tools/streamdiffusion/build_tensorrt_internal.sh @@ -10,7 +10,7 @@ set -e CONDA_PYTHON="/workspace/miniconda3/envs/comfystream/bin/python" MODELS="stabilityai/sd-turbo KBlueLeaf/kohaku-v2.1" TIMESTEPS="3 4" # This is basically the supported sizes for the t_index_list -DIMENSIONS="512x512" # Engines are now compiled for the 384-1024 range, but keep this in case it's useful in the future +DIMENSIONS="1024x1024" # Engines are now compiled for the 384-1024 range, but keep this in case it's useful in the future CONTROLNETS="" # Default empty, will be set from command line # Function to display help diff --git a/runner/dl_checkpoints.sh b/runner/dl_checkpoints.sh index b6eaec439..c421cac65 100755 --- a/runner/dl_checkpoints.sh +++ b/runner/dl_checkpoints.sh @@ -131,6 +131,7 @@ function download_streamdiffusion_live_models() { # StreamDiffusion huggingface-cli download KBlueLeaf/kohaku-v2.1 --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models huggingface-cli download stabilityai/sd-turbo --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models + huggingface-cli download stabilityai/sdxl-turbo --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models # ControlNet models huggingface-cli download thibaud/controlnet-sd21-openpose-diffusers --include "*.bin" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models @@ -213,9 +214,9 @@ function build_streamdiffusion_tensorrt() { docker run --rm -v ./models:/models --gpus all -l TensorRT-engines $AI_RUNNER_STREAMDIFFUSION_IMAGE \ bash -c "./app/tools/streamdiffusion/build_tensorrt_internal.sh \ - --models 'stabilityai/sd-turbo KBlueLeaf/kohaku-v2.1' \ - --timesteps '1 2 3 4' \ - --controlnets 'thibaud/controlnet-sd21-openpose-diffusers thibaud/controlnet-sd21-hed-diffusers thibaud/controlnet-sd21-canny-diffusers thibaud/controlnet-sd21-depth-diffusers thibaud/controlnet-sd21-color-diffusers' \ + --models 'stabilityai/sdxl-turbo' \ + --timesteps '1 2 3' \ + --controlnets '' \ --build-depth-anything \ --build-pose \ && \ diff --git a/runner/docker/Dockerfile.live-base-streamdiffusion b/runner/docker/Dockerfile.live-base-streamdiffusion index 83cfc86e4..619e2e7f3 100644 --- a/runner/docker/Dockerfile.live-base-streamdiffusion +++ b/runner/docker/Dockerfile.live-base-streamdiffusion @@ -29,8 +29,8 @@ RUN conda run -n comfystream pip install --no-cache-dir --force-reinstall \ conda run -n comfystream pip install --no-cache-dir \ xformers==0.0.30 --no-deps -# Install StreamDiffusion @ v0.0.1-cnet.4 into the comfystream environment -RUN conda run -n comfystream pip install git+https://github.com/livepeer/StreamDiffusion.git@v0.0.1-cnet.4#egg=streamdiffusion[tensorrt] +# Install StreamDiffusion @ 902036d into the comfystream environment +RUN conda run -n comfystream pip install git+https://github.com/livepeer/StreamDiffusion.git@902036df74ad4a63b2d179a0e6dcc955e6f54c98#egg=streamdiffusion[tensorrt] # Pin versions of ONNX runtime which are too loose on streamdiffusion setup.py RUN conda run -n comfystream pip install --no-cache-dir \