Skip to content

Commit 84fcd03

Browse files
authored
chore: log timings for loading embedder model (#129)
1 parent 13f8616 commit 84fcd03

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

mostlyai/qa/_sampling.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,16 @@ def calculate_embeddings(
291291
progress_from: int | None = None,
292292
progress_to: int | None = None,
293293
) -> np.ndarray:
294-
t0 = time.time()
295294
# load embedder
295+
t0 = time.time()
296296
embedder = load_embedder()
297+
_LOG.info(f"loaded load_embedder in {time.time() - t0:.2f}s")
297298
# split into buckets for calculating embeddings to avoid memory issues and report continuous progress
298299
steps = progress_to - progress_from if progress_to is not None and progress_from is not None else 1
299300
buckets = np.array_split(strings, steps)
300301
buckets = [b for b in buckets if len(b) > 0]
301302
# calculate embeddings for each bucket
303+
t0 = time.time()
302304
embeds = []
303305
for i, bucket in enumerate(buckets, 1):
304306
embeds += [embedder.encode(bucket.tolist(), show_progress_bar=False)]

mostlyai/qa/assets/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ def load_tokenizer():
4040

4141
def load_embedder():
4242
"""
43-
Load the embedder model.
43+
Load the embedder model.
4444
Can deal with read-only cache folder by attempting to download the model if it is not locally available.
4545
Users can set MOSTLY_HF_HOME environment variable to override the default cache folder.
4646
"""
4747
from sentence_transformers import SentenceTransformer
4848

4949
model_name = "sentence-transformers/all-MiniLM-L6-v2"
50-
cache_folder=os.getenv("MOSTLY_HF_HOME")
50+
cache_folder = os.getenv("MOSTLY_HF_HOME")
5151
try:
5252
# First try loading from local cache
5353
return SentenceTransformer(model_name_or_path=model_name, cache_folder=cache_folder, local_files_only=True)

0 commit comments

Comments
 (0)