File tree Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -291,14 +291,16 @@ def calculate_embeddings(
291291 progress_from : int | None = None ,
292292 progress_to : int | None = None ,
293293) -> np .ndarray :
294- t0 = time .time ()
295294 # load embedder
295+ t0 = time .time ()
296296 embedder = load_embedder ()
297+ _LOG .info (f"loaded load_embedder in { time .time () - t0 :.2f} s" )
297298 # split into buckets for calculating embeddings to avoid memory issues and report continuous progress
298299 steps = progress_to - progress_from if progress_to is not None and progress_from is not None else 1
299300 buckets = np .array_split (strings , steps )
300301 buckets = [b for b in buckets if len (b ) > 0 ]
301302 # calculate embeddings for each bucket
303+ t0 = time .time ()
302304 embeds = []
303305 for i , bucket in enumerate (buckets , 1 ):
304306 embeds += [embedder .encode (bucket .tolist (), show_progress_bar = False )]
Original file line number Diff line number Diff line change @@ -40,14 +40,14 @@ def load_tokenizer():
4040
4141def load_embedder ():
4242 """
43- Load the embedder model.
43+ Load the embedder model.
4444 Can deal with read-only cache folder by attempting to download the model if it is not locally available.
4545 Users can set MOSTLY_HF_HOME environment variable to override the default cache folder.
4646 """
4747 from sentence_transformers import SentenceTransformer
4848
4949 model_name = "sentence-transformers/all-MiniLM-L6-v2"
50- cache_folder = os .getenv ("MOSTLY_HF_HOME" )
50+ cache_folder = os .getenv ("MOSTLY_HF_HOME" )
5151 try :
5252 # First try loading from local cache
5353 return SentenceTransformer (model_name_or_path = model_name , cache_folder = cache_folder , local_files_only = True )
You can’t perform that action at this time.
0 commit comments