AuraFlow support.

comfyanonymous · comfyanonymous · commit a752a2f75095 · 2024-07-12T21:25:34.000-04:00
diff --git a/tensorrt_convert.py b/tensorrt_convert.py
@@ -9,7 +9,6 @@
 from tqdm import tqdm
 
 # TODO:
-# Deal with xformers if it's enabled
 # Make it more generic: less model specific code
 
 # add output directory to tensorrt search path
@@ -163,11 +162,15 @@ def _convert(
         context_len = 77
         context_len_min = context_len
 
-        if context_dim is None: #SD3
+        if isinstance(model.model, comfy.model_base.SD3): #SD3
             context_embedder_config = model.model.model_config.unet_config.get("context_embedder_config", None)
             if context_embedder_config is not None:
                 context_dim = context_embedder_config.get("params", {}).get("in_features", None)
                 context_len = 154 #NOTE: SD3 can have 77 or 154 depending on which text encoders are used, this is why context_len_min stays 77
+        elif isinstance(model.model, comfy.model_base.AuraFlow):
+            context_dim = 2048
+            context_len_min = 256
+            context_len = 256
 
         if context_dim is not None:
             input_names = ["x", "timesteps", "context"]
@@ -207,19 +210,22 @@ def forward(self, x, timesteps, context, y):
             else:
                 class UNET(torch.nn.Module):
                     def forward(self, x, timesteps, context, y=None):
-                        return self.unet(
-                            x,
-                            timesteps,
-                            context,
-                            y,
-                            transformer_options=self.transformer_options,
-                        )
+                        if y is None:
+                            return self.unet(x, timesteps, context, transformer_options=self.transformer_options)
+                        else:
+                            return self.unet(
+                                x,
+                                timesteps,
+                                context,
+                                y,
+                                transformer_options=self.transformer_options,
+                            )
                 _unet = UNET()
                 _unet.unet = unet
                 _unet.transformer_options = transformer_options
                 unet = _unet
 
-            input_channels = model.model.model_config.unet_config.get("in_channels")
+            input_channels = model.model.model_config.unet_config.get("in_channels", 4)
 
             inputs_shapes_min = (
                 (batch_size_min, input_channels, height_min // 8, width_min // 8),
diff --git a/tensorrt_loader.py b/tensorrt_loader.py
@@ -110,7 +110,7 @@ class TensorRTLoader:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {"unet_name": (folder_paths.get_filename_list("tensorrt"), ),
-                             "model_type": (["sdxl_base", "sdxl_refiner", "sd1.x", "sd2.x-768v", "svd", "sd3"], ),
+                             "model_type": (["sdxl_base", "sdxl_refiner", "sd1.x", "sd2.x-768v", "svd", "sd3", "auraflow"], ),
                              }}
     RETURN_TYPES = ("MODEL",)
     FUNCTION = "load_unet"
@@ -146,6 +146,10 @@ def load_unet(self, unet_name, model_type):
             conf = comfy.supported_models.SD3({})
             conf.unet_config["disable_unet_model_creation"] = True
             model = conf.get_model({})
+        elif model_type == "auraflow":
+            conf = comfy.supported_models.AuraFlow({})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = conf.get_model({})
         model.diffusion_model = unet
         model.memory_required = lambda *args, **kwargs: 0 #always pass inputs batched up as much as possible, our TRT code will handle batch splitting