Skip to content

Commit 3e4c26f

Browse files
fix: handle read-only HF cache folder (#126)
1 parent 64efbdf commit 3e4c26f

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

mostlyai/qa/assets/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,21 @@ def load_tokenizer():
3939

4040

4141
def load_embedder():
42+
"""
43+
Load the embedder model.
44+
Can deal with read-only cache folder by attempting to download the model if it is not locally available.
45+
Users can set MOSTLY_HF_HOME environment variable to override the default cache folder.
46+
"""
4247
from sentence_transformers import SentenceTransformer
4348

4449
model_name = "sentence-transformers/all-MiniLM-L6-v2"
45-
return SentenceTransformer(model_name, cache_folder=os.getenv("MOSTLY_HF_HOME"))
50+
cache_folder=os.getenv("MOSTLY_HF_HOME")
51+
try:
52+
# First try loading from local cache
53+
return SentenceTransformer(model_name_or_path=model_name, cache_folder=cache_folder, local_files_only=True)
54+
except Exception:
55+
# If not found in cache, attempt downloading
56+
return SentenceTransformer(model_name_or_path=model_name, cache_folder=cache_folder, local_files_only=False)
4657

4758

4859
__all__ = ["load_embedder"]

0 commit comments

Comments
 (0)