1111from torch import Tensor
1212from transformers import PreTrainedTokenizer , PreTrainedTokenizerFast
1313
14+ from delphi import logger
15+
1416from ..config import ConstructorConfig
1517from .latents import (
1618 ActivatingExample ,
2527def get_model (name : str , device : str = "cuda" ) -> SentenceTransformer :
2628 global model_cache
2729 if (name , device ) not in model_cache :
28- print (f"Loading model { name } on device { device } " )
30+ logger . info (f"Loading model { name } on device { device } " )
2931 model_cache [(name , device )] = SentenceTransformer (name , device = device )
3032 return model_cache [(name , device )]
3133
@@ -284,7 +286,9 @@ def constructor(
284286 for toks , acts in zip (token_windows , act_windows )
285287 ]
286288 if len (record .examples ) < min_examples :
287- print (f"Not enough examples to explain the latent: { len (record .examples )} " )
289+ logger .warning (
290+ f"Not enough examples to explain the latent: { len (record .examples )} "
291+ )
288292 # Not enough examples to explain the latent
289293 return None
290294
@@ -404,7 +408,7 @@ def faiss_non_activation_windows(
404408
405409 # Check if we have enough non-activating examples
406410 if available_indices .numel () < n_not_active :
407- print ("Not enough non-activating examples available" )
411+ logger . warning ("Not enough non-activating examples available" )
408412 return []
409413
410414 # Reshape tokens to get context windows
@@ -426,7 +430,7 @@ def faiss_non_activation_windows(
426430 ]
427431
428432 if not activating_texts :
429- print ("No activating examples available" )
433+ logger . warning ("No activating examples available" )
430434 return []
431435
432436 # Create unique cache keys for both activating and non-activating texts
@@ -451,17 +455,17 @@ def faiss_non_activation_windows(
451455 if cache_enabled and non_activating_cache_file .exists ():
452456 try :
453457 index = faiss .read_index (str (non_activating_cache_file ), faiss .IO_FLAG_MMAP )
454- print (f"Loaded non-activating index from { non_activating_cache_file } " )
458+ logger . info (f"Loaded non-activating index from { non_activating_cache_file } " )
455459 except Exception as e :
456- print (f"Error loading cached embeddings: { repr (e )} " )
460+ logger . warning (f"Error loading cached embeddings: { repr (e )} " )
457461
458462 if index is None :
459- print ("Decoding non-activating tokens..." )
463+ logger . info ("Decoding non-activating tokens..." )
460464 non_activating_texts = [
461465 "" .join (tokenizer .batch_decode (tokens )) for tokens in non_activating_tokens
462466 ]
463467
464- print ("Computing non-activating embeddings..." )
468+ logger . info ("Computing non-activating embeddings..." )
465469 non_activating_embeddings = get_model (embedding_model ).encode (
466470 non_activating_texts , show_progress_bar = False
467471 )
@@ -472,26 +476,30 @@ def faiss_non_activation_windows(
472476 if cache_enabled :
473477 os .makedirs (cache_path , exist_ok = True )
474478 faiss .write_index (index , str (non_activating_cache_file ))
475- print (f"Cached non-activating embeddings to { non_activating_cache_file } " )
479+ logger .info (
480+ f"Cached non-activating embeddings to { non_activating_cache_file } "
481+ )
476482
477483 activating_embeddings = None
478484 if cache_enabled and activating_cache_file .exists ():
479485 try :
480486 activating_embeddings = np .load (activating_cache_file )
481- print (f"Loaded cached activating embeddings from { activating_cache_file } " )
487+ logger .info (
488+ f"Loaded cached activating embeddings from { activating_cache_file } "
489+ )
482490 except Exception as e :
483- print (f"Error loading cached embeddings: { repr (e )} " )
491+ logger . warning (f"Error loading cached embeddings: { repr (e )} " )
484492 # Compute embeddings for activating examples if not cached
485493 if activating_embeddings is None :
486- print ("Computing activating embeddings..." )
494+ logger . info ("Computing activating embeddings..." )
487495 activating_embeddings = get_model (embedding_model ).encode (
488496 activating_texts , show_progress_bar = False
489497 )
490498 # Cache the embeddings
491499 if cache_enabled :
492500 os .makedirs (cache_path , exist_ok = True )
493501 np .save (activating_cache_file , activating_embeddings )
494- print (f"Cached activating embeddings to { activating_cache_file } " )
502+ logger . info (f"Cached activating embeddings to { activating_cache_file } " )
495503
496504 # Search for the nearest neighbors to each activating example
497505 collected_indices = set ()
@@ -618,7 +626,9 @@ def neighbour_non_activation_windows(
618626 )
619627 number_examples += examples_used
620628 if len (all_examples ) == 0 :
621- print ("No examples found, falling back to random non-activating examples" )
629+ logger .warning (
630+ "No examples found, falling back to random non-activating examples"
631+ )
622632 non_active_indices = not_active_mask .nonzero (as_tuple = False ).squeeze ()
623633
624634 return random_non_activating_windows (
@@ -655,7 +665,7 @@ def random_non_activating_windows(
655665 # If this happens it means that the latent is active in every window,
656666 # so it is a bad latent
657667 if available_indices .numel () < n_not_active :
658- print ("No available randomly sampled non-activating sequences" )
668+ logger . warning ("No available randomly sampled non-activating sequences" )
659669 return []
660670 else :
661671 random_indices = torch .randint (
0 commit comments