From 0bd58ad4e3038eb14792472f86be4d27f3f16e1a Mon Sep 17 00:00:00 2001 From: Eyal Mizrachi Date: Tue, 28 Apr 2026 13:29:20 -0400 Subject: [PATCH] Fix DINOv3 layer access for transformers >= 5.0 In recent transformers releases, `DINOv3ViTModel` no longer exposes the encoder layer ModuleList directly on the top-level model. The encoder is wrapped under `.model`, so the layers now live at `self.model.model.layer` (the inner `model` is `DINOv3ViTEncoder`). Older transformers versions exposed `self.model.layer` directly. Probe for both via `getattr(self.model, "model", self.model)` so the change is backward compatible. Repro before the fix (transformers 5.6.2 on PyPI): AttributeError: 'DINOv3ViTModel' object has no attribute 'layer' raised at trellis2/modules/image_feature_extractor.py:86 during `Trellis2ImageTo3DPipeline.run()` when extracting DINOv3 features from the input image. Co-Authored-By: Claude Opus 4.7 (1M context) --- trellis2/modules/image_feature_extractor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/trellis2/modules/image_feature_extractor.py b/trellis2/modules/image_feature_extractor.py index c3cb515a..564eaf35 100644 --- a/trellis2/modules/image_feature_extractor.py +++ b/trellis2/modules/image_feature_extractor.py @@ -83,7 +83,11 @@ def extract_features(self, image: torch.Tensor) -> torch.Tensor: hidden_states = self.model.embeddings(image, bool_masked_pos=None) position_embeddings = self.model.rope_embeddings(image) - for i, layer_module in enumerate(self.model.layer): + # transformers >=5 wraps the DINOv3 encoder under .model (so .layer is at + # self.model.model.layer); older versions exposed .layer directly on the + # top-level model. Pick whichever exists. + encoder = getattr(self.model, "model", self.model) + for i, layer_module in enumerate(encoder.layer): hidden_states = layer_module( hidden_states, position_embeddings=position_embeddings,