diff --git a/docs/source/conf.py b/docs/source/conf.py index 3c14ef2..5e32746 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,7 +14,7 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_tabs.tabs"] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_tabs.tabs", "sphinx_click"] templates_path = ['_templates'] exclude_patterns = [] @@ -25,6 +25,10 @@ html_theme = 'shibuya' html_static_path = ['_static'] +autodoc_mock_imports = [ + "verl", + "deepspeed" +] import os import sys diff --git a/docs/source/guides/add_modality.rst b/docs/source/guides/add_modality.rst index e4a6f48..b9d1925 100644 --- a/docs/source/guides/add_modality.rst +++ b/docs/source/guides/add_modality.rst @@ -170,7 +170,7 @@ Lastly, we implement the modality model. This is the model that performs the for A modality class must inherit :class:`~multimeditron.model.modalities.base.BaseModality` is typically created with 2 main modules: 1. A pretrained modality embedder (like a CLIP model): This module produces meaningful embeddings for given modalities -2. A tunable projection module (usually a simple MLP or a linear layer): This module map embeddings from the modality embedder to the LLM embedding space. The dimension of this embedding space is given by the `hidden_size` attribute of :func:`~multimeditron.model.modalities.base.BaseModalityConfig` +2. A tunable projection module (usually a simple MLP or a linear layer): This module map embeddings from the modality embedder to the LLM embedding space. The dimension of this embedding space is given by the `hidden_size` attribute of :class:`~multimeditron.model.modalities.base.BaseModalityConfig` .. code-block:: python diff --git a/pyproject.toml b/pyproject.toml index 1a53c98..7c34a0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,9 +40,12 @@ flash-attn = [ "flash-attn", ] docs = [ - "sphinx", - "sphinx-tabs", "shibuya", + "sphinx<9", + "sphinx-tabs", + "sphinx-click", + "torch", + "torchvision", ] [project.scripts] diff --git a/src/multimeditron/model/modalities/base.py b/src/multimeditron/model/modalities/base.py index 04301a7..499a794 100644 --- a/src/multimeditron/model/modalities/base.py +++ b/src/multimeditron/model/modalities/base.py @@ -5,6 +5,8 @@ import torch from transformers import AutoModel, AutoConfig, AutoProcessor, PretrainedConfig, PreTrainedModel +__all__ = [] + class BaseModalityConfig(PretrainedConfig): """ Configuration class for defining modality parameters. diff --git a/src/multimeditron/model/modalities/image_modality.py b/src/multimeditron/model/modalities/image_modality.py index 371bfc6..806a9c4 100644 --- a/src/multimeditron/model/modalities/image_modality.py +++ b/src/multimeditron/model/modalities/image_modality.py @@ -1,5 +1,5 @@ from multimeditron.model.constants import NUM_EMBEDDINGS_KEY, MODALITY_VALUE_KEY, POSITION_IDS_KEY -from multimeditron.model.modalities.base import BaseModality, BaseModalityConfig, AutoModality, BaseModalityProcessor +from multimeditron.model.modalities import BaseModality, BaseModalityConfig, AutoModality, BaseModalityProcessor from multimeditron.model.projectors.mlp import MLPProjector import torch from transformers import AutoImageProcessor, AutoModel, AutoConfig diff --git a/src/multimeditron/model/modalities/image_modality_moe.py b/src/multimeditron/model/modalities/image_modality_moe.py index ed7eb97..183d495 100644 --- a/src/multimeditron/model/modalities/image_modality_moe.py +++ b/src/multimeditron/model/modalities/image_modality_moe.py @@ -1,6 +1,6 @@ import torch from multimeditron.model.constants import NUM_EMBEDDINGS_KEY, MODALITY_VALUE_KEY -from multimeditron.model.modalities.base import AutoModality, BaseModality, BaseModalityConfig, BaseModalityProcessor +from multimeditron.model.modalities import AutoModality, BaseModality, BaseModalityConfig, BaseModalityProcessor from multimeditron.model.modalities.moe.gating import GatingNetwork from multimeditron.model.projectors.mlp import MLPProjector from multimeditron.model.attention import CrossAttention diff --git a/src/multimeditron/model/modalities/image_modality_moe_pep.py b/src/multimeditron/model/modalities/image_modality_moe_pep.py index 7d39ffe..f38d21d 100644 --- a/src/multimeditron/model/modalities/image_modality_moe_pep.py +++ b/src/multimeditron/model/modalities/image_modality_moe_pep.py @@ -1,6 +1,5 @@ -import uuid from multimeditron.model.constants import NUM_EMBEDDINGS_KEY, MODALITY_VALUE_KEY -from multimeditron.model.modalities.base import AutoModality, BaseModality, BaseModalityConfig, BaseModalityProcessor +from multimeditron.model.modalities import AutoModality, BaseModality, BaseModalityConfig, BaseModalityProcessor from multimeditron.model.modalities.moe.gating import GatingNetwork from multimeditron.model.projectors.mlp import MLPProjector from multimeditron.model.attention import CrossAttention