88import ida_domain
99from ida_domain .database import IdaCommandOptions
1010from ida_domain .names import DemangleFlags , SetNameFlags
11- from diffrays .database import insert_function , insert_function_with_meta , compress_pseudo , init_db , upsert_binary_metadata , compute_and_store_diffs
11+ from diffrays .database import insert_function , insert_function_with_meta , insert_function_with_features , compress_pseudo , init_db , upsert_binary_metadata , compute_and_store_diffs
12+ from diffrays .heuristics import extract_function_features
1213from diffrays .explorer import explore_database
1314from diffrays .log import log
1415
@@ -169,35 +170,44 @@ def analyze_binary(db_path: str, version: str, debug: bool = False, error_stats:
169170 signature = ""
170171
171172 # Get pseudocode
173+ pseudo = None
172174 try :
173175 pseudo = db .functions .get_pseudocode (func )
174176 if not pseudo :
175177 if debug :
176178 log .debug (f"No pseudocode for function: { name } " )
177- skipped_count += 1
178- continue
179+ # Don't skip - insert with empty pseudocode so function is still tracked
180+ pseudo = []
179181 except Exception as e :
180182 log .warning (f"Failed to get pseudocode for function { name } : { e } " )
181- skipped_count += 1
182- continue
183+ # Don't skip - insert with empty pseudocode so function is still tracked
184+ pseudo = []
183185
184186 # Compress pseudocode
185187 try :
186- compressed = compress_pseudo (pseudo )
188+ compressed = compress_pseudo (pseudo ) if pseudo else compress_pseudo ([ "" ])
187189 if not compressed :
188- log .warning (f"Failed to compress pseudocode for function { name } " )
189- skipped_count += 1
190- continue
190+ log .warning (f"Failed to compress pseudocode for function { name } , using empty" )
191+ compressed = compress_pseudo (["" ])
191192 except Exception as e :
192- log .warning (f"Failed to compress pseudocode for function { name } : { e } " )
193- skipped_count += 1
194- continue
193+ log .warning (f"Failed to compress pseudocode for function { name } : { e } , using empty" )
194+ compressed = compress_pseudo (["" ])
195+
196+ # Extract function features for heuristics
197+ try :
198+ # Get binary base address for RVA calculation
199+ binary_base = db .minimum_ea if hasattr (db , 'minimum_ea' ) else 0
200+ features = extract_function_features (db , func , binary_base )
201+ except Exception as e :
202+ log .warning (f"Failed to extract features for function { name } : { e } " )
203+ # Continue with basic features
204+ features = None
195205
196206 analyzed_count += 1
197207 if debug :
198208 print (f"\r Functions Analyzed: { analyzed_count } /{ total_functions } (Skipped: { skipped_count } )" , end = "" , flush = True )
199209
200- yield name , compressed , func .start_ea , bb_count , signature
210+ yield name , compressed , func .start_ea , bb_count , signature , features
201211
202212 except Exception as e :
203213 error_msg = f"Error processing function { func_idx } at { func .start_ea :X} "
@@ -232,7 +242,7 @@ def analyze_binary(db_path: str, version: str, debug: bool = False, error_stats:
232242 traceback .print_exc ()
233243
234244
235- def run_diff (old_path , new_path , db_path ):
245+ def run_diff (old_path , new_path , db_path , debug : bool = False , use_heuristics : bool = False ):
236246 """Run binary diff analysis between old and new binaries"""
237247
238248 # Initialize error tracking
@@ -318,11 +328,22 @@ def run_diff(old_path, new_path, db_path):
318328 print ()
319329
320330 # Process old binary functions
321- for name , compressed , addr , blocks , signature in analyze_binary (old_path , "old" , debug = True , error_stats = error_stats ):
331+ for result in analyze_binary (old_path , "old" , debug = debug , error_stats = error_stats ):
322332 try :
323- insert_function_with_meta (conn , "old" , name , compressed , addr , blocks , signature )
333+ # Handle both old format (5 items) and new format (6 items with features)
334+ if len (result ) == 6 :
335+ name , compressed , addr , blocks , signature , features = result
336+ if features :
337+ insert_function_with_features (conn , "old" , name , compressed , features )
338+ else :
339+ insert_function_with_meta (conn , "old" , name , compressed , addr , blocks , signature )
340+ else :
341+ name , compressed , addr , blocks , signature = result [:5 ]
342+ insert_function_with_meta (conn , "old" , name , compressed , addr , blocks , signature )
324343 except Exception as e :
325344 try :
345+ name = result [0 ]
346+ compressed = result [1 ]
326347 insert_function (conn , "old" , name , compressed )
327348 except Exception as e2 :
328349 error_msg = f"Failed to insert function { name } from old binary"
@@ -377,11 +398,22 @@ def run_diff(old_path, new_path, db_path):
377398 print ()
378399
379400 # Process new binary functions
380- for name , compressed , addr , blocks , signature in analyze_binary (new_path , "new" , debug = True , error_stats = error_stats ):
401+ for result in analyze_binary (new_path , "new" , debug = debug , error_stats = error_stats ):
381402 try :
382- insert_function_with_meta (conn , "new" , name , compressed , addr , blocks , signature )
403+ # Handle both old format (5 items) and new format (6 items with features)
404+ if len (result ) == 6 :
405+ name , compressed , addr , blocks , signature , features = result
406+ if features :
407+ insert_function_with_features (conn , "new" , name , compressed , features )
408+ else :
409+ insert_function_with_meta (conn , "new" , name , compressed , addr , blocks , signature )
410+ else :
411+ name , compressed , addr , blocks , signature = result [:5 ]
412+ insert_function_with_meta (conn , "new" , name , compressed , addr , blocks , signature )
383413 except Exception as e :
384414 try :
415+ name = result [0 ]
416+ compressed = result [1 ]
385417 insert_function (conn , "new" , name , compressed )
386418 except Exception as e2 :
387419 error_msg = f"Failed to insert function { name } from new binary"
@@ -400,7 +432,8 @@ def run_diff(old_path, new_path, db_path):
400432 # Compute and store diffs
401433 try :
402434 log .info ("Computing diffs and populating diff_results table..." )
403- compute_and_store_diffs (conn )
435+ log .info (f"Using heuristics: { use_heuristics } " )
436+ compute_and_store_diffs (conn , use_heuristics = use_heuristics )
404437 log .info ("Diff computation completed successfully" )
405438 except Exception as e :
406439 error_msg = "Failed to compute/store diffs"
0 commit comments