diff --git a/trellis2/modules/image_feature_extractor.py b/trellis2/modules/image_feature_extractor.py index c3cb515a..564eaf35 100644 --- a/trellis2/modules/image_feature_extractor.py +++ b/trellis2/modules/image_feature_extractor.py @@ -83,7 +83,11 @@ def extract_features(self, image: torch.Tensor) -> torch.Tensor: hidden_states = self.model.embeddings(image, bool_masked_pos=None) position_embeddings = self.model.rope_embeddings(image) - for i, layer_module in enumerate(self.model.layer): + # transformers >=5 wraps the DINOv3 encoder under .model (so .layer is at + # self.model.model.layer); older versions exposed .layer directly on the + # top-level model. Pick whichever exists. + encoder = getattr(self.model, "model", self.model) + for i, layer_module in enumerate(encoder.layer): hidden_states = layer_module( hidden_states, position_embeddings=position_embeddings,