metatensor · PicoCentauri · Oct 27, 2025 · Oct 21, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/.gitignore b/.gitignore
@@ -175,7 +175,7 @@ examples/basic_usage/*.xyz
 extensions/
 
 # sphinx gallery
-docs/src/examples
+docs/src/generated_examples/
 *execution_times*
 
 # JavaScript

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -13,9 +13,7 @@ build:
     rust: "1.75"
   jobs:
     pre_build:
-      - set -e && cd examples/ase && bash train.sh
-      - set -e && cd examples/llpr && bash train.sh
-      - set -e && cd examples/zbl && bash train.sh
+      - set -e && for f in $(find examples -name '*.sh'); do cd $(dirname $f); bash $(basename $f); cd -; done
 
 # Build documentation in the docs/ directory with Sphinx
 sphinx:

diff --git a/docs/generate_examples/conf.py b/docs/generate_examples/conf.py
@@ -15,34 +15,13 @@
 sphinx_gallery_conf = {
     "filename_pattern": r"/*\.py",
     "copyfile_regex": r".*\.(pt|sh|xyz|yaml)",
-    "ignore_pattern": r"train\.sh",
     "example_extensions": {".py", ".sh"},
     "default_thumb_file": os.path.join(ROOT, "docs/src/logo/metatrain-512.png"),
-    "examples_dirs": [
-        os.path.join(ROOT, "examples", "ase"),
-        os.path.join(ROOT, "examples", "llpr"),
-        os.path.join(ROOT, "examples", "zbl"),
-        os.path.join(ROOT, "examples", "programmatic", "use_architectures_outside"),
-        os.path.join(ROOT, "examples", "programmatic", "data_preparation"),
-        os.path.join(ROOT, "examples", "programmatic", "flashmd"),
-        os.path.join(ROOT, "examples", "basic_usage"),
-        os.path.join(ROOT, "examples", "train_from_scratch"),
-        os.path.join(ROOT, "examples", "validation"),
-        os.path.join(ROOT, "examples", "multi-gpu"),
-    ],
-    "gallery_dirs": [
-        os.path.join(ROOT, "docs", "src", "examples", "ase"),
-        os.path.join(ROOT, "docs", "src", "examples",  "llpr"),
-        os.path.join(ROOT, "docs", "src", "examples", "zbl"),
-        os.path.join(ROOT, "docs", "src", "examples", "programmatic", "use_architectures_outside"),
-        os.path.join(ROOT, "docs", "src", "examples", "programmatic", "data_preparation"),
-        os.path.join(ROOT, "docs", "src", "examples", "programmatic", "flashmd"),
-        os.path.join(ROOT, "docs", "src", "examples", "basic_usage"),
-        os.path.join(ROOT, "docs", "src", "examples", "train_from_scratch"),
-        os.path.join(ROOT, "docs", "src", "examples", "validation"),
-        os.path.join(ROOT, "docs", "src", "examples", "multi-gpu"),
-    ],
+    "examples_dirs": "../../examples",
+    "gallery_dirs": "generated_examples",
     "min_reported_time": 5,
     "matplotlib_animations": True,
-    "image_scrapers": ( ChemiscopeScraper(), ),
+    "image_scrapers": ["matplotlib", ChemiscopeScraper()],
+    "remove_config_comments": True,
+    "within_subsection_order": "FileNameSortKey",
 }
diff --git a/docs/src/architectures/nanopet.rst b/docs/src/architectures/nanopet.rst
@@ -59,8 +59,7 @@ hyperparameters to tune are (in decreasing order of importance):
   neural network. Depending on the dataset, increasing this hyperparameter might lead to
   better accuracy, at the cost of increased training and evaluation time.
 - ``loss``: This section describes the loss function to be used. See the
-  :doc:`dedicated documentation page <../advanced-concepts/loss-functions>` for more
-  details.
+  :ref:`loss-functions` for more details.
 - ``long_range``: In some systems and datasets, enabling long-range Coulomb interactions
   might be beneficial for the accuracy of the model and/or its physical correctness.
   See below for a breakdown of the long-range section of the model hyperparameters.

diff --git a/docs/src/architectures/pet.rst b/docs/src/architectures/pet.rst
@@ -62,8 +62,7 @@ hyperparameters to tune are (in decreasing order of importance):
   neural network. Depending on the dataset, increasing this hyperparameter might lead to
   better accuracy, at the cost of increased training and evaluation time.
 - ``loss``: This section describes the loss function to be used. See the
-  :doc:`dedicated documentation page <../advanced-concepts/loss-functions>` for more
-  details.
+  :ref:`loss-functions` for more details.
 - ``long_range``: In some systems and datasets, enabling long-range Coulomb interactions
   might be beneficial for the accuracy of the model and/or its physical correctness.
   See below for a breakdown of the long-range section of the model hyperparameters.

diff --git a/docs/src/architectures/soap-bpnn.rst b/docs/src/architectures/soap-bpnn.rst
@@ -57,8 +57,7 @@ We explain below the model-specific hypers for SOAP-BPNN.
   this hyperparameter to ``false`` will lead to slower convergence of training, but
   might lead to better generalization outside of the training set distribution.
 - ``loss``: This section describes the loss function to be used. See the
-  :doc:`dedicated documentation page <../advanced-concepts/loss-functions>` for more
-  details.
+  :ref:`loss-functions` for more details.
 
 In addition to these model-specific hypers, we re-highlight that the following additive
 models (``zbl`` and ``long_range``) may be needed to achieve better description at the

diff --git a/...c/advanced-concepts/auxiliary-outputs.rst → docs/src/concepts/auxiliary-outputs.rst b/...c/advanced-concepts/auxiliary-outputs.rst → docs/src/concepts/auxiliary-outputs.rst
diff --git a/docs/src/advanced-concepts/index.rst → docs/src/concepts/index.rst b/docs/src/advanced-concepts/index.rst → docs/src/concepts/index.rst
diff --git a/.../src/advanced-concepts/loss-functions.rst → docs/src/concepts/loss-functions.rst b/.../src/advanced-concepts/loss-functions.rst → docs/src/concepts/loss-functions.rst
diff --git a/docs/src/advanced-concepts/output-naming.rst → docs/src/concepts/output-naming.rst b/docs/src/advanced-concepts/output-naming.rst → docs/src/concepts/output-naming.rst
diff --git a/docs/src/faq.rst b/docs/src/faq.rst
@@ -39,7 +39,7 @@ If available you can also try to run on a GPU, which significantly increases per
 Looking at a distribution of your energies per atom can help. Furthermore, outliers, such as large forces
 complicate training, so looking at the distribution of the forces and removing structures with large forces
 (e.g. all structures with forces with an absolute force > 20 eV/Å) from the dataset can help to stabilize training. For these tasks parity plots can be useful to find outliers.
-See our :ref:`sphx_glr_examples_validation_parity_plot.py` for how to create them.
+See our :ref:`sphx_glr_generated_examples_0-beginner_04-parity_plot.py` for how to create them.
 
 General training concepts
 -------------------------
@@ -63,7 +63,7 @@ correlations up to roughly 10 Å.
 **Q: In what format should I provide my data?**
 
 **A:** You can find everything on how to prepare your data in
-:ref:`sphx_glr_examples_programmatic_data_preparation_data_preparation.py`.
+:ref:`sphx_glr_generated_examples_0-beginner_01-data_preparation.py`.
 
 **Q: How small should my errors be before I can use my model to run Molecular Dynamics simulations?**
 

diff --git a/docs/src/getting-started/index.rst b/docs/src/getting-started/index.rst
@@ -7,7 +7,6 @@ This sections describes how to install the package, and its most basic commands.
    :maxdepth: 1
 
    quickstart
-   ../examples/basic_usage/usage
    custom_dataset_conf
    advanced_base_config
    override

diff --git a/docs/src/getting-started/override.rst b/docs/src/getting-started/override.rst
@@ -4,7 +4,7 @@ Override Architecture's Default Parameters
 In our initial tutorial, we used default parameters to train a model employing the
 SOAP-BPNN architecture, as shown in the following config:
 
-.. literalinclude:: ../../../examples/basic_usage/options.yaml
+.. literalinclude:: ../../../examples/0-beginner/options-basic.yaml
    :language: yaml
 
 While default parameters often serve as a good starting point, depending on your

diff --git a/docs/src/index.rst b/docs/src/index.rst
@@ -19,8 +19,8 @@
    getting-started/index
    configuration/index
    architectures/index
-   tutorials/index
-   advanced-concepts/index
+   generated_examples/index
+   concepts/index
    faq
    cite
    dev-docs/index

diff --git a/docs/src/tutorials/advanced_tutorials/index.rst b/docs/src/tutorials/advanced_tutorials/index.rst
diff --git a/docs/src/tutorials/beginner_tutorials/index.rst b/docs/src/tutorials/beginner_tutorials/index.rst
diff --git a/docs/src/tutorials/index.rst b/docs/src/tutorials/index.rst
diff --git a/.../programmatic/data_preparation/.gitignore → examples/0-beginner/.gitignore b/.../programmatic/data_preparation/.gitignore → examples/0-beginner/.gitignore
@@ -1,3 +1,5 @@
 data.xyz
+output.xyz
 qm9_reduced_100.zip
 qm9_reduced_100_all_at_once.zip
+carbon_reduced_100_memmap/*
diff --git a/examples/basic_usage/usage.sh → examples/0-beginner/00-basic-usage.sh b/examples/basic_usage/usage.sh → examples/0-beginner/00-basic-usage.sh
@@ -14,9 +14,8 @@ mtt --help
 #
 # We now demonstrate how to ``train`` and ``evaluate`` a model from the command line.
 # For this example we use the :ref:`architecture-soap-bpnn` architecture and a subset of
-# the `QM9 dataset <https://paperswithcode.com/dataset/qm9>`_. You can obtain the
-# dataset for this example here: :download:`qm9_reduced_100.xyz 
-# <../../../../examples/basic_usage/qm9_reduced_100.xyz>`.
+# the `QM9 dataset <https://www.nature.com/articles/sdata201422>`_. You can obtain the
+# dataset for this example here: :download:`qm9_reduced_100.xyz <qm9_reduced_100.xyz>`.
 #
 #
 # Training
@@ -44,7 +43,7 @@ mtt --help
 # corresponding documentation page. We will use these minimal options to run an example
 # training using the default hyperparameters of an SOAP BPNN model
 #
-# .. literalinclude:: ../../../../examples/basic_usage/options.yaml
+# .. literalinclude:: options-basic.yaml
 #    :language: yaml
 #
 # For each training run a new output directory in the format
@@ -54,7 +53,7 @@ mtt --help
 # in the current directory and type
 
 
-mtt train options.yaml
+mtt train options-basic.yaml
 
 # %%
 #
@@ -85,7 +84,7 @@ mtt train --help
 # system and possible target values for evaluation. The system section of this
 # ``eval.yaml`` is exactly the same as for a dataset in the ``options.yaml`` file.
 #
-# .. literalinclude:: ../../../../examples/basic_usage/eval.yaml
+# .. literalinclude:: eval-basic.yaml
 #    :language: yaml
 #
 # Note that the ``targets`` section is optional. If the ``targets`` section is present,
@@ -97,7 +96,7 @@ mtt train --help
 # trained model and the second an option file containing the path of the dataset for
 # evaulation. The extensions of the model, if any, can be specified via the ``-e`` flag.
 
-mtt eval model.pt eval.yaml -e extensions/
+mtt eval model.pt eval-basic.yaml -e extensions/
 
 # %%
 #

diff --git a/...atic/data_preparation/data_preparation.py → examples/0-beginner/01-data_preparation.py b/...atic/data_preparation/data_preparation.py → examples/0-beginner/01-data_preparation.py
@@ -189,7 +189,7 @@
 structures = ase.io.read("carbon_reduced_100.xyz", index=":")
 
 root = Path("carbon_reduced_100_memmap/")
-root.mkdir()
+root.mkdir(exist_ok=True)
 
 ns_path = root / "ns.npy"
 na_path = root / "na.npy"
@@ -236,7 +236,7 @@
 #
 # For example, you can use the following options file:
 #
-# .. literalinclude:: options.yaml
+# .. literalinclude:: options-memmap.yaml
 #    :language: yaml
 
-subprocess.run(["mtt", "train", "options.yaml"])
+subprocess.run(["mtt", "train", "options-memmap.yaml"])
diff --git a/docs/src/advanced-concepts/fine-tuning.rst → examples/0-beginner/02-fine-tuning.py b/docs/src/advanced-concepts/fine-tuning.rst → examples/0-beginner/02-fine-tuning.py
@@ -1,7 +1,8 @@
+r"""
 .. _fine-tuning:
 
-Fine-tuning
-===========
+Fine-tune a pre-trained model
+=============================
 
 .. warning::
 
@@ -15,7 +16,6 @@
 
 There is a complete example in :ref:`Fine-tune example <fine-tuning-example>`.
 
-
 .. note::
 
   Please note that the fine-tuning recommendations in this section are not universal
@@ -44,8 +44,8 @@
         method: "full" # This stands for the full fine-tuning
         read_from: path/to/checkpoint.ckpt
 
-We recommend to use a lower learning rate than the one used for the original training, as
-this will help stabilizing the training process. I.e. if the default learning rate is
+We recommend to use a lower learning rate than the one used for the original training,
+as this will help stabilizing the training process. I.e. if the default learning rate is
 ``1e-4``, you can set it to ``1e-5`` or even lower, using the following in the
 ``options.yaml`` file:
 
@@ -59,19 +59,19 @@
 will be taken from the checkpoint and not adapted to the new dataset.
 
 The basic fine-tuning strategy is a good choice in the case when the level of theory
-which is used for the original training is the same, or at least similar to the one used for
-the new dataset. However, since this is not always the case, we also provide more advanced
-fine-tuning strategies described below.
+which is used for the original training is the same, or at least similar to the one used
+for the new dataset. However, since this is not always the case, we also provide more
+advanced fine-tuning strategies described below.
 
 
 Fine-tuning model Heads
 -----------------------
 
-Adapting all the model weights to a new dataset is not always the best approach. If the new
-dataset consist of the same or similar data computed with a slightly different level of theory
-compared to the pre-trained models' dataset, you might want to keep the learned representations
-of the crystal structures and only adapt the readout layers (i.e. the model heads) to the new
-dataset.
+Adapting all the model weights to a new dataset is not always the best approach. If the
+new dataset consist of the same or similar data computed with a slightly different level
+of theory compared to the pre-trained models' dataset, you might want to keep the
+learned representations of the crystal structures and only adapt the readout layers
+(i.e. the model heads) to the new dataset.
 
 In this case, the ``mtt train`` command needs to be accompanied by the specific training
 options in the ``options.yaml`` file. The following options need to be set:
@@ -98,18 +98,19 @@
 edge heads and last layers to be fine-tuned.
 
 We recommend to first start the fine-tuning including all the modules listed above and
-experiment with their different combinations if needed. You might also consider using a lower
-learning rate, e.g. ``1e-5`` or even lower, to stabilize the training process.
+experiment with their different combinations if needed. You might also consider using a
+lower learning rate, e.g. ``1e-5`` or even lower, to stabilize the training process.
 
 
 LoRA Fine-tuning
 ----------------
 
-If the conceptually new type of structures is introduced in the new dataset, tuning only the
-model heads might not be sufficient. In this case, you might need to adapt the internal
-representations of the crystal structures. This can be done using the LoRA technique. However,
-in this case the model heads will be not adapted to the new dataset, so conceptually the
-level of theory should be consistent with the one used for the pre-trained model.
+If the conceptually new type of structures is introduced in the new dataset, tuning only
+the model heads might not be sufficient. In this case, you might need to adapt the
+internal representations of the crystal structures. This can be done using the LoRA
+technique. However, in this case the model heads will be not adapted to the new dataset,
+so conceptually the level of theory should be consistent with the one used for the
+pre-trained model.
 
 What is LoRA?
 ^^^^^^^^^^^^^
@@ -161,9 +162,9 @@
 ------------------------------------
 
 If the new dataset is computed with a totally different level of theory compared to the
-pre-trained model, which includes, for instance, the different composition energies,
-or you want to fine-tune the model on a completely new target, you might need to consider
-the transfer learning approach and introduce a new target in the
-``options.yaml`` file. More details about this approach can be found in the
-:ref:`Transfer Learning <transfer-learning>` section of the documentation.
-
+pre-trained model, which includes, for instance, the different composition energies, or
+you want to fine-tune the model on a completely new target, you might need to consider
+the transfer learning approach and introduce a new target in the ``options.yaml`` file.
+More details about this approach can be found in the :ref:`Transfer Learning
+<transfer-learning>` section of the documentation.
+"""