Speed up generation time a tiny bit.

comfyanonymous · comfyanonymous · commit a9a6923922cd · 2024-06-18T10:22:53.000-04:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "comfyui_tensorrt"
 description = "TensorRT Node for ComfyUI\nThis node enables the best performance on NVIDIA RTX™ Graphics Cards  (GPUs) for Stable Diffusion by leveraging NVIDIA TensorRT."
-version = "0.1.2"
+version = "0.1.3"
 license = "LICENSE"
 dependencies = [
   "tensorrt>=10.0.1",
diff --git a/tensorrt_loader.py b/tensorrt_loader.py
@@ -96,7 +96,7 @@ def __call__(self, x, timesteps, context, y=None, control=None, transformer_opti
                 x = model_inputs_converted[k]
                 self.context.set_tensor_address(k, x[(x.shape[0] // curr_split_batch) * i:].data_ptr())
             self.context.execute_async_v3(stream_handle=stream.cuda_stream)
-        stream.synchronize()
+        # stream.synchronize() #don't need to sync stream since it's the default torch one
         return out
 
     def load_state_dict(self, sd, strict=False):