Skip to content

Commit e555ecc

Browse files
committed
Heuristics Implemented
1 parent 5e58245 commit e555ecc

23 files changed

+3156
-104
lines changed

diffrays/__init__.py

100644100755
File mode changed.

diffrays/analyzer.py

100644100755
Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
import ida_domain
99
from ida_domain.database import IdaCommandOptions
1010
from ida_domain.names import DemangleFlags, SetNameFlags
11-
from diffrays.database import insert_function, insert_function_with_meta, compress_pseudo, init_db, upsert_binary_metadata, compute_and_store_diffs
11+
from diffrays.database import insert_function, insert_function_with_meta, insert_function_with_features, compress_pseudo, init_db, upsert_binary_metadata, compute_and_store_diffs
12+
from diffrays.heuristics import extract_function_features
1213
from diffrays.explorer import explore_database
1314
from diffrays.log import log
1415

@@ -169,35 +170,44 @@ def analyze_binary(db_path: str, version: str, debug: bool = False, error_stats:
169170
signature = ""
170171

171172
# Get pseudocode
173+
pseudo = None
172174
try:
173175
pseudo = db.functions.get_pseudocode(func)
174176
if not pseudo:
175177
if debug:
176178
log.debug(f"No pseudocode for function: {name}")
177-
skipped_count += 1
178-
continue
179+
# Don't skip - insert with empty pseudocode so function is still tracked
180+
pseudo = []
179181
except Exception as e:
180182
log.warning(f"Failed to get pseudocode for function {name}: {e}")
181-
skipped_count += 1
182-
continue
183+
# Don't skip - insert with empty pseudocode so function is still tracked
184+
pseudo = []
183185

184186
# Compress pseudocode
185187
try:
186-
compressed = compress_pseudo(pseudo)
188+
compressed = compress_pseudo(pseudo) if pseudo else compress_pseudo([""])
187189
if not compressed:
188-
log.warning(f"Failed to compress pseudocode for function {name}")
189-
skipped_count += 1
190-
continue
190+
log.warning(f"Failed to compress pseudocode for function {name}, using empty")
191+
compressed = compress_pseudo([""])
191192
except Exception as e:
192-
log.warning(f"Failed to compress pseudocode for function {name}: {e}")
193-
skipped_count += 1
194-
continue
193+
log.warning(f"Failed to compress pseudocode for function {name}: {e}, using empty")
194+
compressed = compress_pseudo([""])
195+
196+
# Extract function features for heuristics
197+
try:
198+
# Get binary base address for RVA calculation
199+
binary_base = db.minimum_ea if hasattr(db, 'minimum_ea') else 0
200+
features = extract_function_features(db, func, binary_base)
201+
except Exception as e:
202+
log.warning(f"Failed to extract features for function {name}: {e}")
203+
# Continue with basic features
204+
features = None
195205

196206
analyzed_count += 1
197207
if debug:
198208
print(f"\rFunctions Analyzed: {analyzed_count}/{total_functions} (Skipped: {skipped_count})", end="", flush=True)
199209

200-
yield name, compressed, func.start_ea, bb_count, signature
210+
yield name, compressed, func.start_ea, bb_count, signature, features
201211

202212
except Exception as e:
203213
error_msg = f"Error processing function {func_idx} at {func.start_ea:X}"
@@ -232,7 +242,7 @@ def analyze_binary(db_path: str, version: str, debug: bool = False, error_stats:
232242
traceback.print_exc()
233243

234244

235-
def run_diff(old_path, new_path, db_path):
245+
def run_diff(old_path, new_path, db_path, debug: bool = False, use_heuristics: bool = False):
236246
"""Run binary diff analysis between old and new binaries"""
237247

238248
# Initialize error tracking
@@ -318,11 +328,22 @@ def run_diff(old_path, new_path, db_path):
318328
print()
319329

320330
# Process old binary functions
321-
for name, compressed, addr, blocks, signature in analyze_binary(old_path, "old", debug=True, error_stats=error_stats):
331+
for result in analyze_binary(old_path, "old", debug=debug, error_stats=error_stats):
322332
try:
323-
insert_function_with_meta(conn, "old", name, compressed, addr, blocks, signature)
333+
# Handle both old format (5 items) and new format (6 items with features)
334+
if len(result) == 6:
335+
name, compressed, addr, blocks, signature, features = result
336+
if features:
337+
insert_function_with_features(conn, "old", name, compressed, features)
338+
else:
339+
insert_function_with_meta(conn, "old", name, compressed, addr, blocks, signature)
340+
else:
341+
name, compressed, addr, blocks, signature = result[:5]
342+
insert_function_with_meta(conn, "old", name, compressed, addr, blocks, signature)
324343
except Exception as e:
325344
try:
345+
name = result[0]
346+
compressed = result[1]
326347
insert_function(conn, "old", name, compressed)
327348
except Exception as e2:
328349
error_msg = f"Failed to insert function {name} from old binary"
@@ -377,11 +398,22 @@ def run_diff(old_path, new_path, db_path):
377398
print()
378399

379400
# Process new binary functions
380-
for name, compressed, addr, blocks, signature in analyze_binary(new_path, "new", debug=True, error_stats=error_stats):
401+
for result in analyze_binary(new_path, "new", debug=debug, error_stats=error_stats):
381402
try:
382-
insert_function_with_meta(conn, "new", name, compressed, addr, blocks, signature)
403+
# Handle both old format (5 items) and new format (6 items with features)
404+
if len(result) == 6:
405+
name, compressed, addr, blocks, signature, features = result
406+
if features:
407+
insert_function_with_features(conn, "new", name, compressed, features)
408+
else:
409+
insert_function_with_meta(conn, "new", name, compressed, addr, blocks, signature)
410+
else:
411+
name, compressed, addr, blocks, signature = result[:5]
412+
insert_function_with_meta(conn, "new", name, compressed, addr, blocks, signature)
383413
except Exception as e:
384414
try:
415+
name = result[0]
416+
compressed = result[1]
385417
insert_function(conn, "new", name, compressed)
386418
except Exception as e2:
387419
error_msg = f"Failed to insert function {name} from new binary"
@@ -400,7 +432,8 @@ def run_diff(old_path, new_path, db_path):
400432
# Compute and store diffs
401433
try:
402434
log.info("Computing diffs and populating diff_results table...")
403-
compute_and_store_diffs(conn)
435+
log.info(f"Using heuristics: {use_heuristics}")
436+
compute_and_store_diffs(conn, use_heuristics=use_heuristics)
404437
log.info("Diff computation completed successfully")
405438
except Exception as e:
406439
error_msg = "Failed to compute/store diffs"

diffrays/cli.py

100644100755
Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
| |/ /| | | | | | |\ \ (_| | |_| \__ \
3232
|___/ |_|_| |_| \_| \_\__,_|\__, |___/
3333
__/ |
34-
|___/ {Fore.YELLOW}v1.6.2 Pi{Style.RESET_ALL}
34+
|___/ {Fore.YELLOW}v2.0 Tau{Style.RESET_ALL}
3535
"""
3636

3737
def print_success(message):
@@ -87,17 +87,18 @@ def check_ida_available():
8787
# Only log warning if debug mode is enabled elsewhere
8888
return False
8989

90-
def run_diff_safe(old_path, new_path, output_db, log_file, debug_mode):
90+
def run_diff_safe(old_path, new_path, output_db, log_file, debug_mode, use_heuristics=False):
9191
"""Safely run diff analysis with proper error handling"""
9292
try:
9393
from diffrays.analyzer import run_diff
9494

9595
if debug_mode:
9696
log.info(f"Starting analysis: {old_path} -> {new_path}")
9797
log.info(f"Output database: {output_db}")
98+
log.info(f"Using heuristics: {use_heuristics}")
9899

99100
# print_info(f"Analyzing binaries...")
100-
run_diff(old_path, new_path, output_db)
101+
run_diff(old_path, new_path, output_db, debug=debug_mode, use_heuristics=use_heuristics)
101102

102103
if debug_mode:
103104
log.info("Analysis completed successfully!")
@@ -153,6 +154,7 @@ def main():
153154
diff_parser.add_argument("-o", "--output", help="SQLite output file (default: auto-generated)")
154155
diff_parser.add_argument("--log", action="store_true", help="Store logs in file")
155156
diff_parser.add_argument("--debug", action="store_true", help="Enable debug logging and verbose output")
157+
diff_parser.add_argument("--heuristic", action="store_true", help="Use heuristic-based function matching (default: match by function names only)")
156158

157159
# Server command
158160
server_parser = sub.add_parser("server", help="Launch web server to view diff results")
@@ -173,6 +175,7 @@ def main():
173175
autodiff_parser.add_argument("-o", "--output", help="SQLite output file (default: auto-generated)",)
174176
autodiff_parser.add_argument("--log", action="store_true", help="Store logs in file")
175177
autodiff_parser.add_argument("--debug", action="store_true", help="Enable debug logging and verbose output")
178+
autodiff_parser.add_argument("--heuristic", action="store_true", help="Use heuristic-based function matching (default: match by function names only)")
176179

177180
args = parser.parse_args()
178181

@@ -199,6 +202,8 @@ def main():
199202
print_config_line("Log File:", log_file or 'None')
200203
print_config_line("Debug Mode:", f"{Fore.GREEN}Enabled{Style.RESET_ALL}" if debug_mode else f"{Fore.RED}Disabled{Style.RESET_ALL}")
201204
print_config_line("Logging:", f"{Fore.GREEN}Enabled{Style.RESET_ALL}" if getattr(args, 'log', False) else f"{Fore.RED}Disabled{Style.RESET_ALL}")
205+
use_heuristics = getattr(args, 'heuristic', False)
206+
print_config_line("Heuristics:", f"{Fore.GREEN}Enabled{Style.RESET_ALL}" if use_heuristics else f"{Fore.YELLOW}Disabled{Style.RESET_ALL}")
202207
print_separator()
203208
print()
204209

@@ -209,7 +214,7 @@ def main():
209214
log.info(f"Logging to file: {log_file}")
210215

211216
# Run diff safely
212-
run_diff_safe(args.old, args.new, output_db, log_file, debug_mode)
217+
run_diff_safe(args.old, args.new, output_db, log_file, debug_mode, use_heuristics=use_heuristics)
213218

214219
elif args.command == "server":
215220
log_file = None
@@ -326,6 +331,8 @@ def main():
326331
print_config_line("Output DB:", args.output or f'{Fore.YELLOW}Auto-generated{Style.RESET_ALL}')
327332
print_config_line("Debug Mode:", f"{Fore.GREEN}Enabled{Style.RESET_ALL}" if debug_mode else f"{Fore.RED}Disabled{Style.RESET_ALL}")
328333
print_config_line("Logging:", f"{Fore.GREEN}Enabled{Style.RESET_ALL}" if getattr(args, 'log', False) else f"{Fore.RED}Disabled{Style.RESET_ALL}")
334+
use_heuristics = getattr(args, 'heuristic', False)
335+
print_config_line("Heuristics:", f"{Fore.GREEN}Enabled{Style.RESET_ALL}" if use_heuristics else f"{Fore.YELLOW}Disabled{Style.RESET_ALL}")
329336
print_separator()
330337
print()
331338

@@ -378,7 +385,8 @@ def main():
378385

379386
print_separator()
380387
print()
381-
run_diff_safe(vulnerable_file, patched_file, output_db, log_file, debug_mode)
388+
use_heuristics = getattr(args, 'heuristic', False)
389+
run_diff_safe(vulnerable_file, patched_file, output_db, log_file, debug_mode, use_heuristics=use_heuristics)
382390

383391
except Exception as e:
384392
if debug_mode:

0 commit comments

Comments
 (0)