From f2726b5fadfff55a38f1ae75ab21405336991d29 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Fri, 1 Aug 2025 21:06:48 +0000
Subject: [PATCH 1/7] Remove controlnets to test sdturbo perf (50FPS)

---
 runner/app/live/pipelines/streamdiffusion.py | 100 +++++++++----------
 runner/app/live/trickle/decoder.py           |   2 +-
 2 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/runner/app/live/pipelines/streamdiffusion.py b/runner/app/live/pipelines/streamdiffusion.py
index aa0464293..195e818e5 100644
--- a/runner/app/live/pipelines/streamdiffusion.py
+++ b/runner/app/live/pipelines/streamdiffusion.py
@@ -77,54 +77,54 @@ class Config:
 
     # ControlNet settings
     controlnets: Optional[List[ControlNetConfig]] = [
-        ControlNetConfig(
-            model_id="thibaud/controlnet-sd21-openpose-diffusers",
-            conditioning_scale=0.711,
-            preprocessor="pose_tensorrt",
-            preprocessor_params={},
-            enabled=True,
-            control_guidance_start=0.0,
-            control_guidance_end=1.0,
-        ),
-        ControlNetConfig(
-            model_id="thibaud/controlnet-sd21-hed-diffusers",
-            conditioning_scale=0.2,
-            preprocessor="soft_edge",
-            preprocessor_params={},
-            enabled=True,
-            control_guidance_start=0.0,
-            control_guidance_end=1.0,
-        ),
-        ControlNetConfig(
-            model_id="thibaud/controlnet-sd21-canny-diffusers",
-            conditioning_scale=0.2,
-            preprocessor="canny",
-            preprocessor_params={
-                "low_threshold": 100,
-                "high_threshold": 200
-            },
-            enabled=True,
-            control_guidance_start=0.0,
-            control_guidance_end=1.0,
-        ),
-        ControlNetConfig(
-            model_id="thibaud/controlnet-sd21-depth-diffusers",
-            conditioning_scale=0.5,
-            preprocessor="depth_tensorrt",
-            preprocessor_params={},
-            enabled=True,
-            control_guidance_start=0.0,
-            control_guidance_end=1.0,
-        ),
-        ControlNetConfig(
-            model_id="thibaud/controlnet-sd21-color-diffusers",
-            conditioning_scale=0.2,
-            preprocessor="passthrough",
-            preprocessor_params={},
-            enabled=True,
-            control_guidance_start=0.0,
-            control_guidance_end=1.0,
-        )
+        # ControlNetConfig(
+        #     model_id="thibaud/controlnet-sd21-openpose-diffusers",
+        #     conditioning_scale=0.711,
+        #     preprocessor="pose_tensorrt",
+        #     preprocessor_params={},
+        #     enabled=True,
+        #     control_guidance_start=0.0,
+        #     control_guidance_end=1.0,
+        # ),
+        # ControlNetConfig(
+        #     model_id="thibaud/controlnet-sd21-hed-diffusers",
+        #     conditioning_scale=0.2,
+        #     preprocessor="soft_edge",
+        #     preprocessor_params={},
+        #     enabled=True,
+        #     control_guidance_start=0.0,
+        #     control_guidance_end=1.0,
+        # ),
+        # ControlNetConfig(
+        #     model_id="thibaud/controlnet-sd21-canny-diffusers",
+        #     conditioning_scale=0.2,
+        #     preprocessor="canny",
+        #     preprocessor_params={
+        #         "low_threshold": 100,
+        #         "high_threshold": 200
+        #     },
+        #     enabled=True,
+        #     control_guidance_start=0.0,
+        #     control_guidance_end=1.0,
+        # ),
+        # ControlNetConfig(
+        #     model_id="thibaud/controlnet-sd21-depth-diffusers",
+        #     conditioning_scale=0.5,
+        #     preprocessor="depth_tensorrt",
+        #     preprocessor_params={},
+        #     enabled=True,
+        #     control_guidance_start=0.0,
+        #     control_guidance_end=1.0,
+        # ),
+        # ControlNetConfig(
+        #     model_id="thibaud/controlnet-sd21-color-diffusers",
+        #     conditioning_scale=0.2,
+        #     preprocessor="passthrough",
+        #     preprocessor_params={},
+        #     enabled=True,
+        #     control_guidance_start=0.0,
+        #     control_guidance_end=1.0,
+        # )
     ]
 
     @model_validator(mode="after")
@@ -177,8 +177,8 @@ def process_tensor_sync(self, img_tensor: torch.Tensor):
         img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor))
         img_tensor = self.pipe.preprocess_image(img_tensor)
 
-        # Noop if ControlNets are not enabled
-        self.pipe.update_control_image_efficient(img_tensor)
+        if self.params and self.params.controlnets:
+            self.pipe.update_control_image_efficient(img_tensor)
 
         if self.first_frame:
             self.first_frame = False
diff --git a/runner/app/live/trickle/decoder.py b/runner/app/live/trickle/decoder.py
index d240bdf96..d9b5796c4 100644
--- a/runner/app/live/trickle/decoder.py
+++ b/runner/app/live/trickle/decoder.py
@@ -9,7 +9,7 @@
 
 from .frame import InputFrame
 
-MAX_FRAMERATE=24
+MAX_FRAMERATE=120
 
 def decode_av(pipe_input, frame_callback, put_metadata, target_width, target_height):
     """

From 4812d3f61e46e8cb675c97df209806fca26c2e4e Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Fri, 1 Aug 2025 21:14:28 +0000
Subject: [PATCH 2/7] Move denormalization to outer process (~50FPS)

---
 runner/app/live/pipelines/streamdiffusion.py |  4 ++--
 runner/app/live/streamer/process.py          | 13 +++++++++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/runner/app/live/pipelines/streamdiffusion.py b/runner/app/live/pipelines/streamdiffusion.py
index 195e818e5..d1297133f 100644
--- a/runner/app/live/pipelines/streamdiffusion.py
+++ b/runner/app/live/pipelines/streamdiffusion.py
@@ -173,8 +173,8 @@ def process_tensor_sync(self, img_tensor: torch.Tensor):
 
         # The incoming frame.tensor is (B, H, W, C) in range [-1, 1] while the
         # VaeImageProcessor inside the wrapper expects (B, C, H, W) in [0, 1].
-        img_tensor = img_tensor.permute(0, 3, 1, 2)
-        img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor))
+        # img_tensor = img_tensor.permute(0, 3, 1, 2)
+        # img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor))
         img_tensor = self.pipe.preprocess_image(img_tensor)
 
         if self.params and self.params.controlnets:
diff --git a/runner/app/live/streamer/process.py b/runner/app/live/streamer/process.py
index 45ad3e4ae..3cc531842 100644
--- a/runner/app/live/streamer/process.py
+++ b/runner/app/live/streamer/process.py
@@ -14,6 +14,8 @@
 from log import config_logging, config_logging_fields, log_timing
 from trickle import InputFrame, AudioFrame, VideoFrame, OutputFrame, VideoOutput, AudioOutput
 
+from streamdiffusion.image_utils import denormalize
+
 class PipelineProcess:
     @staticmethod
     def start(pipeline_name: str, params: dict):
@@ -96,8 +98,15 @@ def reset_stream(self, request_id: str, manifest_id: str, stream_id: str):
     # TODO: Once audio is implemented, combined send_input with input_loop
     # We don't need additional queueing as comfystream already maintains a queue
     def send_input(self, frame: InputFrame):
-        if isinstance(frame, VideoFrame) and not frame.tensor.is_cuda and torch.cuda.is_available():
-            frame = frame.replace_tensor(frame.tensor.cuda())
+        if isinstance(frame, VideoFrame):
+            img_tensor = frame.tensor
+            if not img_tensor.is_cuda and torch.cuda.is_available():
+                img_tensor = img_tensor.cuda()
+            img_tensor = img_tensor.permute(0, 3, 1, 2)
+            img_tensor = denormalize(img_tensor)
+            # img_tensor = self.pipe.preprocess_image(img_tensor)
+            frame = frame.replace_tensor(img_tensor)
+
         self._try_queue_put(self.input_queue, frame)
 
     async def recv_output(self) -> OutputFrame | None:

From 48495ef8069076551b9d482e00c9d3823f4dd535 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Fri, 1 Aug 2025 21:27:29 +0000
Subject: [PATCH 3/7] Move all preprocessing to outer process (~50FPS)

---
 runner/app/live/pipelines/streamdiffusion.py | 2 +-
 runner/app/live/streamer/process.py          | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/runner/app/live/pipelines/streamdiffusion.py b/runner/app/live/pipelines/streamdiffusion.py
index d1297133f..6b66c815a 100644
--- a/runner/app/live/pipelines/streamdiffusion.py
+++ b/runner/app/live/pipelines/streamdiffusion.py
@@ -175,7 +175,7 @@ def process_tensor_sync(self, img_tensor: torch.Tensor):
         # VaeImageProcessor inside the wrapper expects (B, C, H, W) in [0, 1].
         # img_tensor = img_tensor.permute(0, 3, 1, 2)
         # img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor))
-        img_tensor = self.pipe.preprocess_image(img_tensor)
+        # img_tensor = self.pipe.preprocess_image(img_tensor)
 
         if self.params and self.params.controlnets:
             self.pipe.update_control_image_efficient(img_tensor)
diff --git a/runner/app/live/streamer/process.py b/runner/app/live/streamer/process.py
index 3cc531842..32bafd5e2 100644
--- a/runner/app/live/streamer/process.py
+++ b/runner/app/live/streamer/process.py
@@ -14,7 +14,7 @@
 from log import config_logging, config_logging_fields, log_timing
 from trickle import InputFrame, AudioFrame, VideoFrame, OutputFrame, VideoOutput, AudioOutput
 
-from streamdiffusion.image_utils import denormalize
+from diffusers.image_processor import VaeImageProcessor
 
 class PipelineProcess:
     @staticmethod
@@ -42,6 +42,8 @@ def __init__(self, pipeline_name: str):
         self.start_time = 0.0
         self.request_id = ""
 
+        self.image_processor = VaeImageProcessor()
+
     def is_alive(self):
         return self.process.is_alive()
 
@@ -103,8 +105,8 @@ def send_input(self, frame: InputFrame):
             if not img_tensor.is_cuda and torch.cuda.is_available():
                 img_tensor = img_tensor.cuda()
             img_tensor = img_tensor.permute(0, 3, 1, 2)
-            img_tensor = denormalize(img_tensor)
-            # img_tensor = self.pipe.preprocess_image(img_tensor)
+            img_tensor = self.image_processor.denormalize(img_tensor)
+            img_tensor = self.image_processor.preprocess(img_tensor)
             frame = frame.replace_tensor(img_tensor)
 
         self._try_queue_put(self.input_queue, frame)

From 0c06d4d2c1145a58a939c586e3cdfa529303302b Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Fri, 1 Aug 2025 21:27:51 +0000
Subject: [PATCH 4/7] Skip normalization back-and-forth (~50FPS)

Actually won like 0.5FPS maybe but too subtle
---
 runner/app/live/streamer/process.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/runner/app/live/streamer/process.py b/runner/app/live/streamer/process.py
index 32bafd5e2..48e5ad84c 100644
--- a/runner/app/live/streamer/process.py
+++ b/runner/app/live/streamer/process.py
@@ -42,7 +42,7 @@ def __init__(self, pipeline_name: str):
         self.start_time = 0.0
         self.request_id = ""
 
-        self.image_processor = VaeImageProcessor()
+        self.image_processor = VaeImageProcessor(do_normalize=False)
 
     def is_alive(self):
         return self.process.is_alive()
@@ -105,7 +105,7 @@ def send_input(self, frame: InputFrame):
             if not img_tensor.is_cuda and torch.cuda.is_available():
                 img_tensor = img_tensor.cuda()
             img_tensor = img_tensor.permute(0, 3, 1, 2)
-            img_tensor = self.image_processor.denormalize(img_tensor)
+            # img_tensor = self.image_processor.denormalize(img_tensor)
             img_tensor = self.image_processor.preprocess(img_tensor)
             frame = frame.replace_tensor(img_tensor)
 

From d9997b863ac73d60a67c21f46d8dc2b5b2e21f26 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Fri, 1 Aug 2025 21:45:06 +0000
Subject: [PATCH 5/7] Offload preprocessing to CPU (~51FPS)

---
 runner/app/live/streamer/process.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/runner/app/live/streamer/process.py b/runner/app/live/streamer/process.py
index 48e5ad84c..9fd037db6 100644
--- a/runner/app/live/streamer/process.py
+++ b/runner/app/live/streamer/process.py
@@ -102,11 +102,13 @@ def reset_stream(self, request_id: str, manifest_id: str, stream_id: str):
     def send_input(self, frame: InputFrame):
         if isinstance(frame, VideoFrame):
             img_tensor = frame.tensor
-            if not img_tensor.is_cuda and torch.cuda.is_available():
-                img_tensor = img_tensor.cuda()
+            if img_tensor.is_cuda:
+                img_tensor = img_tensor.cpu()
             img_tensor = img_tensor.permute(0, 3, 1, 2)
             # img_tensor = self.image_processor.denormalize(img_tensor)
             img_tensor = self.image_processor.preprocess(img_tensor)
+            if torch.cuda.is_available() and not img_tensor.is_cuda:
+                img_tensor = img_tensor.cuda()
             frame = frame.replace_tensor(img_tensor)
 
         self._try_queue_put(self.input_queue, frame)

From 7baa4e63fe5a3a2d55b4e5da3c8cb9adee02d47b Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Fri, 1 Aug 2025 21:54:20 +0000
Subject: [PATCH 6/7] Revert to 4812d3f

---
 runner/app/live/pipelines/streamdiffusion.py |  2 +-
 runner/app/live/streamer/process.py          | 14 +++++---------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/runner/app/live/pipelines/streamdiffusion.py b/runner/app/live/pipelines/streamdiffusion.py
index 6b66c815a..d1297133f 100644
--- a/runner/app/live/pipelines/streamdiffusion.py
+++ b/runner/app/live/pipelines/streamdiffusion.py
@@ -175,7 +175,7 @@ def process_tensor_sync(self, img_tensor: torch.Tensor):
         # VaeImageProcessor inside the wrapper expects (B, C, H, W) in [0, 1].
         # img_tensor = img_tensor.permute(0, 3, 1, 2)
         # img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor))
-        # img_tensor = self.pipe.preprocess_image(img_tensor)
+        img_tensor = self.pipe.preprocess_image(img_tensor)
 
         if self.params and self.params.controlnets:
             self.pipe.update_control_image_efficient(img_tensor)
diff --git a/runner/app/live/streamer/process.py b/runner/app/live/streamer/process.py
index 9fd037db6..3cc531842 100644
--- a/runner/app/live/streamer/process.py
+++ b/runner/app/live/streamer/process.py
@@ -14,7 +14,7 @@
 from log import config_logging, config_logging_fields, log_timing
 from trickle import InputFrame, AudioFrame, VideoFrame, OutputFrame, VideoOutput, AudioOutput
 
-from diffusers.image_processor import VaeImageProcessor
+from streamdiffusion.image_utils import denormalize
 
 class PipelineProcess:
     @staticmethod
@@ -42,8 +42,6 @@ def __init__(self, pipeline_name: str):
         self.start_time = 0.0
         self.request_id = ""
 
-        self.image_processor = VaeImageProcessor(do_normalize=False)
-
     def is_alive(self):
         return self.process.is_alive()
 
@@ -102,13 +100,11 @@ def reset_stream(self, request_id: str, manifest_id: str, stream_id: str):
     def send_input(self, frame: InputFrame):
         if isinstance(frame, VideoFrame):
             img_tensor = frame.tensor
-            if img_tensor.is_cuda:
-                img_tensor = img_tensor.cpu()
-            img_tensor = img_tensor.permute(0, 3, 1, 2)
-            # img_tensor = self.image_processor.denormalize(img_tensor)
-            img_tensor = self.image_processor.preprocess(img_tensor)
-            if torch.cuda.is_available() and not img_tensor.is_cuda:
+            if not img_tensor.is_cuda and torch.cuda.is_available():
                 img_tensor = img_tensor.cuda()
+            img_tensor = img_tensor.permute(0, 3, 1, 2)
+            img_tensor = denormalize(img_tensor)
+            # img_tensor = self.pipe.preprocess_image(img_tensor)
             frame = frame.replace_tensor(img_tensor)
 
         self._try_queue_put(self.input_queue, frame)

From ed73ecb85522565be140b36fb6759455da3bcb7b Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Mon, 4 Aug 2025 15:23:51 +0000
Subject: [PATCH 7/7] WIPSDXL

---
 runner/app/live/pipelines/streamdiffusion.py           |  9 +++++----
 runner/app/live/streamer/process.py                    | 10 +++++++---
 .../tools/streamdiffusion/build_tensorrt_internal.sh   |  2 +-
 runner/dl_checkpoints.sh                               |  7 ++++---
 runner/docker/Dockerfile.live-base-streamdiffusion     |  4 ++--
 5 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/runner/app/live/pipelines/streamdiffusion.py b/runner/app/live/pipelines/streamdiffusion.py
index d1297133f..b4dfe7077 100644
--- a/runner/app/live/pipelines/streamdiffusion.py
+++ b/runner/app/live/pipelines/streamdiffusion.py
@@ -6,13 +6,13 @@
 import torch
 from pydantic import BaseModel, Field, model_validator
 from streamdiffusion import StreamDiffusionWrapper
-from streamdiffusion.controlnet.preprocessors import list_preprocessors
+# from streamdiffusion.controlnet.preprocessors import list_preprocessors
 
 from .interface import Pipeline
 from trickle import VideoFrame, VideoOutput
 from trickle import DEFAULT_WIDTH, DEFAULT_HEIGHT
 
-AVAILABLE_PREPROCESSORS = list_preprocessors()
+AVAILABLE_PREPROCESSORS = []
 
 class ControlNetConfig(BaseModel):
     """ControlNet configuration model"""
@@ -39,7 +39,8 @@ class Config:
     model_id: Literal[
         "stabilityai/sd-turbo",
         "KBlueLeaf/kohaku-v2.1",
-    ] = "stabilityai/sd-turbo"
+        "stabilityai/sdxl-turbo",
+    ] = "stabilityai/sdxl-turbo"
 
     # Generation parameters
     prompt: str | List[Tuple[str, float]] = "an anime render of a girl with purple hair, masterpiece"
@@ -175,7 +176,7 @@ def process_tensor_sync(self, img_tensor: torch.Tensor):
         # VaeImageProcessor inside the wrapper expects (B, C, H, W) in [0, 1].
         # img_tensor = img_tensor.permute(0, 3, 1, 2)
         # img_tensor = cast(torch.Tensor, self.pipe.stream.image_processor.denormalize(img_tensor))
-        img_tensor = self.pipe.preprocess_image(img_tensor)
+        # img_tensor = self.pipe.preprocess_image(img_tensor)
 
         if self.params and self.params.controlnets:
             self.pipe.update_control_image_efficient(img_tensor)
diff --git a/runner/app/live/streamer/process.py b/runner/app/live/streamer/process.py
index 3cc531842..0c1ca467e 100644
--- a/runner/app/live/streamer/process.py
+++ b/runner/app/live/streamer/process.py
@@ -14,7 +14,7 @@
 from log import config_logging, config_logging_fields, log_timing
 from trickle import InputFrame, AudioFrame, VideoFrame, OutputFrame, VideoOutput, AudioOutput
 
-from streamdiffusion.image_utils import denormalize
+from diffusers.image_processor import VaeImageProcessor
 
 class PipelineProcess:
     @staticmethod
@@ -42,6 +42,8 @@ def __init__(self, pipeline_name: str):
         self.start_time = 0.0
         self.request_id = ""
 
+        self.image_processor = VaeImageProcessor()
+
     def is_alive(self):
         return self.process.is_alive()
 
@@ -103,8 +105,8 @@ def send_input(self, frame: InputFrame):
             if not img_tensor.is_cuda and torch.cuda.is_available():
                 img_tensor = img_tensor.cuda()
             img_tensor = img_tensor.permute(0, 3, 1, 2)
-            img_tensor = denormalize(img_tensor)
-            # img_tensor = self.pipe.preprocess_image(img_tensor)
+            img_tensor = self.image_processor.denormalize(img_tensor)
+            # img_tensor = self.image_processor.preprocess(img_tensor)
             frame = frame.replace_tensor(img_tensor)
 
         self._try_queue_put(self.input_queue, frame)
@@ -179,6 +181,7 @@ async def _initialize_pipeline(self):
                 return pipeline
         except Exception as e:
             self._report_error(f"Error loading pipeline: {e}")
+            logging.exception(e)
             if not params:
                 # Already tried loading with default params
                 raise
@@ -191,6 +194,7 @@ async def _initialize_pipeline(self):
                     return pipeline
             except Exception as e:
                 self._report_error(f"Error loading pipeline with default params: {e}")
+                logging.exception(e)
                 raise
 
     async def _run_pipeline_loops(self):
diff --git a/runner/app/tools/streamdiffusion/build_tensorrt_internal.sh b/runner/app/tools/streamdiffusion/build_tensorrt_internal.sh
index 5773a979b..a0fc75372 100755
--- a/runner/app/tools/streamdiffusion/build_tensorrt_internal.sh
+++ b/runner/app/tools/streamdiffusion/build_tensorrt_internal.sh
@@ -10,7 +10,7 @@ set -e
 CONDA_PYTHON="/workspace/miniconda3/envs/comfystream/bin/python"
 MODELS="stabilityai/sd-turbo KBlueLeaf/kohaku-v2.1"
 TIMESTEPS="3 4" # This is basically the supported sizes for the t_index_list
-DIMENSIONS="512x512" # Engines are now compiled for the 384-1024 range, but keep this in case it's useful in the future
+DIMENSIONS="1024x1024" # Engines are now compiled for the 384-1024 range, but keep this in case it's useful in the future
 CONTROLNETS="" # Default empty, will be set from command line
 
 # Function to display help
diff --git a/runner/dl_checkpoints.sh b/runner/dl_checkpoints.sh
index b6eaec439..c421cac65 100755
--- a/runner/dl_checkpoints.sh
+++ b/runner/dl_checkpoints.sh
@@ -131,6 +131,7 @@ function download_streamdiffusion_live_models() {
   # StreamDiffusion
   huggingface-cli download KBlueLeaf/kohaku-v2.1 --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
   huggingface-cli download stabilityai/sd-turbo --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
+  huggingface-cli download stabilityai/sdxl-turbo --include "*.safetensors" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
 
   # ControlNet models
   huggingface-cli download thibaud/controlnet-sd21-openpose-diffusers --include "*.bin" "*.json" "*.txt" --exclude ".onnx" ".onnx_data" --cache-dir models
@@ -213,9 +214,9 @@ function build_streamdiffusion_tensorrt() {
 
   docker run --rm -v ./models:/models --gpus all -l TensorRT-engines $AI_RUNNER_STREAMDIFFUSION_IMAGE \
     bash -c "./app/tools/streamdiffusion/build_tensorrt_internal.sh \
-              --models 'stabilityai/sd-turbo KBlueLeaf/kohaku-v2.1' \
-              --timesteps '1 2 3 4' \
-              --controlnets 'thibaud/controlnet-sd21-openpose-diffusers thibaud/controlnet-sd21-hed-diffusers thibaud/controlnet-sd21-canny-diffusers thibaud/controlnet-sd21-depth-diffusers thibaud/controlnet-sd21-color-diffusers' \
+              --models 'stabilityai/sdxl-turbo' \
+              --timesteps '1 2 3' \
+              --controlnets '' \
               --build-depth-anything \
               --build-pose \
               && \
diff --git a/runner/docker/Dockerfile.live-base-streamdiffusion b/runner/docker/Dockerfile.live-base-streamdiffusion
index 83cfc86e4..619e2e7f3 100644
--- a/runner/docker/Dockerfile.live-base-streamdiffusion
+++ b/runner/docker/Dockerfile.live-base-streamdiffusion
@@ -29,8 +29,8 @@ RUN conda run -n comfystream pip install --no-cache-dir --force-reinstall \
     conda run -n comfystream pip install --no-cache-dir \
     xformers==0.0.30 --no-deps
 
-# Install StreamDiffusion @ v0.0.1-cnet.4 into the comfystream environment
-RUN conda run -n comfystream pip install git+https://github.com/livepeer/StreamDiffusion.git@v0.0.1-cnet.4#egg=streamdiffusion[tensorrt]
+# Install StreamDiffusion @ 902036d into the comfystream environment
+RUN conda run -n comfystream pip install git+https://github.com/livepeer/StreamDiffusion.git@902036df74ad4a63b2d179a0e6dcc955e6f54c98#egg=streamdiffusion[tensorrt]
 
 # Pin versions of ONNX runtime which are too loose on streamdiffusion setup.py
 RUN conda run -n comfystream pip install --no-cache-dir \