diff --git a/.github/workflows/architecture-tests.yml b/.github/workflows/architecture-tests.yml index 4f412c5f49..e0bc06269a 100644 --- a/.github/workflows/architecture-tests.yml +++ b/.github/workflows/architecture-tests.yml @@ -33,5 +33,7 @@ jobs: - name: run architecture tests run: tox -e ${{ matrix.architecture-name }}-tests env: + # CI should always generate test files + FORCE_REGENERATE: true # Use the CPU only version of torch when building/running the code PIP_EXTRA_INDEX_URL: https://download.pytorch.org/whl/cpu diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 588a85d823..e9574344de 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,5 +20,7 @@ jobs: - name: Test build integrity run: tox -e build env: + # CI should always generate test files + FORCE_REGENERATE: true # Use the CPU only version of torch when building/running the code PIP_EXTRA_INDEX_URL: https://download.pytorch.org/whl/cpu diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ec47d93599..baf8eb8ad5 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -23,6 +23,8 @@ jobs: - name: build documentation run: tox -e docs env: + # CI should always generate test files + FORCE_REGENERATE: true # Use the CPU-only version of torch PIP_EXTRA_INDEX_URL: https://download.pytorch.org/whl/cpu diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dd8d14339b..2d7f2b531a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -44,6 +44,8 @@ jobs: tox -e tests coverage xml --data-file tests/.coverage env: + # CI should always generate test files + FORCE_REGENERATE: true # Use the CPU only version of torch when building/running the code PIP_EXTRA_INDEX_URL: https://download.pytorch.org/whl/cpu HUGGINGFACE_TOKEN_METATRAIN: ${{ secrets.HUGGINGFACE_TOKEN }} diff --git a/.gitignore b/.gitignore index ff5d46f744..dce1093ada 100644 --- a/.gitignore +++ b/.gitignore @@ -179,3 +179,6 @@ docs/src/examples node_modules/ package-lock.json package.json + +# caching githash +.data_version.txt diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index bd7ed2ea37..774f1b8e02 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -89,6 +89,17 @@ testing it. Also, you may want to setup your editor to automatically apply the ` are plugins to do this with `all major editors `_. +By default, the main test suite regenerates the necessary model files every time +it runs. For faster local development, you can **opt-in** to caching these files +by setting the ``USE_CACHE`` environment variable to ``1``: + +.. code-block:: bash + + USE_CACHE=1 tox -e tests + +When caching is enabled, the script will skip regeneration as long as the cached +files exist and the underlying source code has not changed. + If you want to test a specific archicture you can also do it. For example .. code-block:: bash diff --git a/tests/resources/generate-outputs.sh b/tests/resources/generate-outputs.sh index 921895d1a6..bed498f338 100755 --- a/tests/resources/generate-outputs.sh +++ b/tests/resources/generate-outputs.sh @@ -7,9 +7,52 @@ ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) cd "$ROOT_DIR" -mtt train options.yaml -o model-32-bit.pt -r base_precision=32 -mtt train options.yaml -o model-64-bit.pt -r base_precision=64 -mtt train options-pet.yaml -o model-pet.pt +HASH_FILE=".data_version.txt" +WATCH_PATHS="src/" +FORCE_REGENERATE=true + +if [[ "${USE_CACHE:-0}" == "1" ]]; then + echo "USE_CACHE=1 detected. Attempting to use cached data." + CACHE_IS_VALID=true + if [ -n "$(git status --porcelain -- $WATCH_PATHS)" ]; then + echo "Cache is invalid due to uncommitted changes. Must regenerate." + CACHE_IS_VALID=false + elif [ ! -f "$HASH_FILE" ]; then + echo "Cache is invalid: version file not found. Must regenerate." + CACHE_IS_VALID=false + else + SAVED_HASH=$(cat "$HASH_FILE") + CURRENT_HASH=$(git rev-parse HEAD) + if [ "$SAVED_HASH" != "$CURRENT_HASH" ]; then + echo "Cache is invalid: code version has changed. Must regenerate." + CACHE_IS_VALID=false + fi + fi + + # If all checks passed, we can rely on the cache. + if [ "$CACHE_IS_VALID" = true ]; then + echo "Cache is valid. Will skip regeneration for existing files." + FORCE_REGENERATE=false + fi +fi + +# Regenerate if regeneration is forced (default) OR if a file is missing. +if [ "$FORCE_REGENERATE" = true ] || [ ! -f "model-32-bit.pt" ]; then + mtt train options.yaml -o model-32-bit.pt -r base_precision=32 +fi + +if [ "$FORCE_REGENERATE" = true ] || [ ! -f "model-64-bit.pt" ]; then + mtt train options.yaml -o model-64-bit.pt -r base_precision=64 +fi + +if [ "$FORCE_REGENERATE" = true ] || [ ! -f "model-pet.pt" ]; then + mtt train options-pet.yaml -o model-pet.pt +fi + +if [ "$FORCE_REGENERATE" = true ]; then + echo "Saving current git commit hash to version the data." + git rev-parse HEAD > "$HASH_FILE" +fi set +x # disable command echoing for sensitive private token check TOKEN_PRESENT=false